R Data types • BasicR

Primitive types

Numeric and integer

x <- 1
y <- 2L

cat("R is non-declarative. It means we don't have to declare the type of variables before creating them. \n It also means that the class (type) of the variables will be assigned automatically!")

R is non-declarative. It means we don’t have to declare the type of variables before creating them. It also means that the class (type) of the variables will be assigned automatically!

class(x)

[1] “numeric”

class(y)

[1] “integer”

cat("Change the class of a variable - coercing \n")

Change the class of a variable - coercing

class(as.integer(x))

[1] “integer”

z <- 1.314
v <- 1.678
v2 <- 2.1
class(z)

[1] “numeric”

print(as.integer(z))

[1] 1

print(as.integer(v))

[1] 1

print(as.integer(v2))

[1] 2

Character

a <- "a"
a1 <- 'b'

b <- "The red fox"
class(a)

## [1] "character"

class(a1)

## [1] "character"

class(b)

## [1] "character"

#"Careful!"
c <- "1"
class(c)

## [1] "character"

Logical

a <- TRUE
b <- FALSE

print(a)

## [1] TRUE

class(a)

## [1] "logical"

#BUT!

c <- "TRUE"

class(c)

## [1] "character"

print(as.integer(a))

## [1] 1

print(as.integer(b))

## [1] 0

a <- as.logical(0L)
b <- as.logical(1)
print(a)

## [1] FALSE

print(b)

## [1] TRUE

Operators

Arithmetic operators

Operator	Description
+	Addition
-	Substraction
*	Multiplication
/	Division
^	Exponent
%%	Modulus (Remainder from division)
%/%	Integer Division

Relational operators

Operator	Description
<	Less than
>	Greater than
<=	Less than or equal to
>=	Greater than or equal to
==	Equal to
!=	Not equal to

Logical operators

Operator	Description
!	Logical NOT
&	Element-wise logical AND
&&	Logical AND

Assignment operators

Operator	Description
<-, <<-, =	Leftwards assignment
->, ->>	Rightwards assignment

Operations with primitive types

num1 <- 15.12
num2 <- 6.54
num3 <- 1

char1 <- "a"
char2 <- "b"
char3 <- "1"

logic1 <- TRUE
logic2 <- FALSE

num1+num2

## [1] 21.66

num1-num2

## [1] 8.58

num1*num2

## [1] 98.8848

num1/num2

## [1] 2.311927

num2^2

## [1] 42.7716

num1%%num2

## [1] 2.04

num1%/%num2

## [1] 2

try(char1+num1)

## Error in char1 + num1 : non-numeric argument to binary operator

try(char3+num1)

## Error in char3 + num1 : non-numeric argument to binary operator

try(as.numeric(char1)+num1)

## Warning in doTryCatch(return(expr), name, parentenv, handler): NAs introduced by
## coercion

## [1] NA

try(as.numeric(char3)+num1)

## [1] 16.12

try(logic1+logic2)

## [1] 1

try(num1+logic1)

## [1] 16.12

num1<num2

## [1] FALSE

num1!=num2

## [1] TRUE

num1==num2

## [1] FALSE

try(char1<char2)

## [1] TRUE

# alphabetical order 
try(char1==char2)

## [1] FALSE

try(char1!=char2)

## [1] TRUE

#But!

try(char1>num2)

## [1] TRUE

try(char1<=num3)

## [1] FALSE

try(char3<=num3)

## [1] TRUE

logic1<logic2

## [1] FALSE

logic1!=logic2

## [1] TRUE

logic1==logic2

## [1] FALSE

#Logical operators

TRUE & FALSE

## [1] FALSE

TRUE | FALSE

## [1] TRUE

!TRUE

## [1] FALSE

#Depends on your computer/locale

x1 <- 0.5 - 0.3
x2 <- 0.3 - 0.1
x1 == x2

## [1] FALSE

# should use all.equal instead
all.equal(x1, x2)

## [1] TRUE

#concatenate characters

paste(char1, char2)

## [1] "a b"

paste(char1, char2, sep="_")

## [1] "a_b"

paste0(char1, char2)

## [1] "ab"

Simple types (S3 classes)

Vectors

Vectors are 1 dimensional collection of data of the same type. Indexing in R is always 1 based.

#empty vector

a <- vector()
a <- c()

# integer vector

a <- c(1L,2L,3L)
a <- 1:3

typeof(a)

## [1] "integer"

class(a)

## [1] "integer"

#It can handle only one type! It will coerce the data into the most permissive type:
b <- c(1,2,3)

a <- c(1L,"two",3L)
typeof(a)

## [1] "character"

a <- c(1L,2,3L)
typeof(a)

## [1] "double"

#vector functions

length(a)

## [1] 3

a2 <- c(a,a) #combine vectors
a2

## [1] 1 2 3 1 2 3

#compare vectors

a==b

## [1] TRUE TRUE TRUE

a==a

## [1] TRUE TRUE TRUE

#but!
a==a2

## [1] TRUE TRUE TRUE TRUE TRUE TRUE

a <- c(TRUE, TRUE, FALSE)
b <- c(TRUE, TRUE, TRUE)

a & b

## [1]  TRUE  TRUE FALSE

a && b

## Warning in a && b: 'length(x) = 3 > 1' in coercion to 'logical(1)'

## Warning in a && b: 'length(x) = 3 > 1' in coercion to 'logical(1)'

## [1] TRUE

a | b

## [1] TRUE TRUE TRUE

a || b

## Warning in a || b: 'length(x) = 3 > 1' in coercion to 'logical(1)'

## [1] TRUE

# subsetting vectors 
# Indexing is 1-based

b <- c(1,2,4, NA)
a <- c(1,2,3)

a[1]

## [1] 1

b[c(1,2)]

## [1] 1 2

b[1:2]

## [1] 1 2

a[a>2]

## [1] 3

b[b>2]

## [1]  4 NA

b[!is.na(b)]

## [1] 1 2 4

which(!is.na(b))

## [1] 1 2 3

#named vectors

names(a) <- c("one", "two", "three")

a

##   one   two three 
##     1     2     3

a["one"]

## one 
##   1

paste(a, collapse = ",")

## [1] "1,2,3"

#other useful functions

sum(a)

## [1] 6

mean(a)

## [1] 2

median(a)

## [1] 2

sd(a)

## [1] 1

mean(b)

## [1] NA

mean(b, na.rm = T)

## [1] 2.333333

Factors

Factors in R are quite unique type. It is very useful in some cases, but in my experience, it can be source of errors if not handled properly. It is used for categorical variables. It is internally represented as integers, but has labels. The set of labels are fixed.

a <- factor(c("a", "b", "c", "a"), levels = c("a", "b", "c", "d"))
a

## [1] a b c a
## Levels: a b c d

class(a)

## [1] "factor"

typeof(a)

## [1] "integer"

a[2] <- "d"
#but!
a[2] <- "e"

## Warning in `[<-.factor`(`*tmp*`, 2, value = "e"): invalid factor level, NA
## generated

a <- factor(c("1", "2", "3"), levels = c("1", "2", "3", "4"))
b <- factor(c("1", "4", "3"), levels = c("1", "4", "3", "2"))

as.numeric(b)==as.numeric(a)

## [1] TRUE TRUE TRUE

a==b

## [1]  TRUE FALSE  TRUE

sort(a)

## [1] 1 2 3
## Levels: 1 2 3 4

sort(b)

## [1] 1 4 3
## Levels: 1 4 3 2

as.numeric(as.character(b))==as.numeric(as.character(a))

## [1]  TRUE FALSE  TRUE

Date and time

x <- as.Date("1971-01-01")
unclass(x)

## [1] 365

typeof(x)

## [1] "double"

attributes(x)

## $class
## [1] "Date"

current_date <- Sys.time()
current_date

## [1] "2022-05-18 13:43:20 UTC"

format(current_date, "%d %b %Y")

## [1] "18 May 2022"

current_date <- Sys.Date()

format(current_date-1, "%d %b %Y")

## [1] "17 May 2022"

Lists

Similar to vectors, but can take lists as elements. The elements can have different types.

a <- list(c(1, 2),2,3)
a

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 3

a <- list(c(1:4), "two", "three")
a

## [[1]]
## [1] 1 2 3 4
## 
## [[2]]
## [1] "two"
## 
## [[3]]
## [1] "three"

a <- list(list(c(1, 2),2,3,4), "two", "three")
a

## [[1]]
## [[1]][[1]]
## [1] 1 2
## 
## [[1]][[2]]
## [1] 2
## 
## [[1]][[3]]
## [1] 3
## 
## [[1]][[4]]
## [1] 4
## 
## 
## [[2]]
## [1] "two"
## 
## [[3]]
## [1] "three"

#can have names

b <- list(numbers=c(1:4),letters=c("two", "three"))


#referring to an element/subsetting

a[[1]]

## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 3
## 
## [[4]]
## [1] 4

a[[1]][[1]]

## [1] 1 2

a[[1]][[1]][1]

## [1] 1

b$letters[1]

## [1] "two"

#functions

length(a)

## [1] 3

length(a$letters)

## [1] 0

Matrix

Data format to store values in a matrix (rows and columns). The elements can be primitive types. All element has to be the same type.

letter_mat <- matrix(data=c(rep("a", 3), rep("b", 3)), nrow = 3)

num_mat <- matrix(1:9, nrow=3, ncol=3)

# matrix can have row names but not column names

num_mat*num_mat #element-wise multiplication

##      [,1] [,2] [,3]
## [1,]    1   16   49
## [2,]    4   25   64
## [3,]    9   36   81

num_mat %*% num_mat #matrix multiplication

##      [,1] [,2] [,3]
## [1,]   30   66  102
## [2,]   36   81  126
## [3,]   42   96  150

t(num_mat)  #Transpose

##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    4    5    6
## [3,]    7    8    9

# many other matrix specific funtions and operators

#subsetting

letter_mat[1,2]

## [1] "b"

letter_mat[1,]

## [1] "a" "b"

letter_mat[1:2]

## [1] "a" "a"

letter_mat[3]

## [1] "a"

letter_mat[,1]

## [1] "a" "a" "a"

#combining
cbind(letter_mat, num_mat)

##      [,1] [,2] [,3] [,4] [,5]
## [1,] "a"  "b"  "1"  "4"  "7" 
## [2,] "a"  "b"  "2"  "5"  "8" 
## [3,] "a"  "b"  "3"  "6"  "9"

rbind(letter_mat, letter_mat)

##      [,1] [,2]
## [1,] "a"  "b" 
## [2,] "a"  "b" 
## [3,] "a"  "b" 
## [4,] "a"  "b" 
## [5,] "a"  "b" 
## [6,] "a"  "b"

#row and column-wise operations
#matrixStats package - fast!


num_mat <- matrix(runif(100), nrow=10, ncol=10)

rowMeans(num_mat)

##  [1] 0.3851638 0.6427088 0.6609717 0.3203019 0.4464088 0.5554319 0.5290633
##  [8] 0.4225664 0.2569312 0.4657085

matrixStats::rowMeans2(num_mat)

##  [1] 0.3851638 0.6427088 0.6609717 0.3203019 0.4464088 0.5554319 0.5290633
##  [8] 0.4225664 0.2569312 0.4657085

rowSums(num_mat)

##  [1] 3.851638 6.427088 6.609717 3.203019 4.464088 5.554319 5.290633 4.225664
##  [9] 2.569312 4.657085

colMeans(num_mat)

##  [1] 0.3228825 0.4540523 0.4353427 0.5532767 0.5179125 0.5816021 0.5123537
##  [8] 0.2864224 0.5966993 0.4247122

colSums(num_mat)

##  [1] 3.228825 4.540523 4.353427 5.532767 5.179125 5.816021 5.123537 2.864224
##  [9] 5.966993 4.247122

Data frame

2D object type. Rows and columns with names. The columns can have different element types.

df <- data.frame(value=c(1,2,3), "First name"=as.factor(c("John", "John", "Peter")), last_name=as.factor(c("Smith", "Black", "Rabbit")), row.names = c("Patient 1", "Patient 2", "Patient 3"))


df[1:2,]

##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black

df[c(TRUE, TRUE, FALSE),]

##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black

df[c("Patient 1", "Patient 2"),]

##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black

df[-3,]

##           value First.name last_name
## Patient 1     1       John     Smith
## Patient 2     2       John     Black