Primitive types
Numeric and integer
x <- 1
y <- 2L
cat("R is non-declarative. It means we don't have to declare the type of variables before creating them. \n It also means that the class (type) of the variables will be assigned automatically!")
R is non-declarative. It means we don’t have to declare the type of variables before creating them. It also means that the class (type) of the variables will be assigned automatically!
class(x)
[1] “numeric”
class(y)
[1] “integer”
cat("Change the class of a variable - coercing \n")
Change the class of a variable - coercing
class(as.integer(x))
[1] “integer”
z <- 1.314
v <- 1.678
v2 <- 2.1
class(z)
[1] “numeric”
print(as.integer(z))
[1] 1
print(as.integer(v))
[1] 1
print(as.integer(v2))
[1] 2
Character
a <- "a"
a1 <- 'b'
b <- "The red fox"
class(a)
## [1] "character"
class(a1)
## [1] "character"
class(b)
## [1] "character"
#"Careful!"
c <- "1"
class(c)
## [1] "character"
Logical
a <- TRUE
b <- FALSE
print(a)
## [1] TRUE
class(a)
## [1] "logical"
#BUT!
c <- "TRUE"
class(c)
## [1] "character"
print(as.integer(a))
## [1] 1
print(as.integer(b))
## [1] 0
a <- as.logical(0L)
b <- as.logical(1)
print(a)
## [1] FALSE
print(b)
## [1] TRUE
Operators
Arithmetic operators
Operator | Description |
---|---|
+ | Addition |
- | Substraction |
* | Multiplication |
/ | Division |
^ | Exponent |
%% | Modulus (Remainder from division) |
%/% | Integer Division |
Operations with primitive types
num1 <- 15.12
num2 <- 6.54
num3 <- 1
char1 <- "a"
char2 <- "b"
char3 <- "1"
logic1 <- TRUE
logic2 <- FALSE
num1+num2
## [1] 21.66
num1-num2
## [1] 8.58
num1*num2
## [1] 98.8848
num1/num2
## [1] 2.311927
num2^2
## [1] 42.7716
num1%%num2
## [1] 2.04
num1%/%num2
## [1] 2
try(char1+num1)
## Error in char1 + num1 : non-numeric argument to binary operator
try(char3+num1)
## Error in char3 + num1 : non-numeric argument to binary operator
try(as.numeric(char1)+num1)
## Warning in doTryCatch(return(expr), name, parentenv, handler): NAs introduced by
## coercion
## [1] NA
try(as.numeric(char3)+num1)
## [1] 16.12
try(logic1+logic2)
## [1] 1
try(num1+logic1)
## [1] 16.12
num1<num2
## [1] FALSE
num1!=num2
## [1] TRUE
num1==num2
## [1] FALSE
try(char1<char2)
## [1] TRUE
# alphabetical order
try(char1==char2)
## [1] FALSE
try(char1!=char2)
## [1] TRUE
#But!
try(char1>num2)
## [1] TRUE
try(char1<=num3)
## [1] FALSE
try(char3<=num3)
## [1] TRUE
logic1<logic2
## [1] FALSE
logic1!=logic2
## [1] TRUE
logic1==logic2
## [1] FALSE
#Logical operators
TRUE & FALSE
## [1] FALSE
TRUE | FALSE
## [1] TRUE
!TRUE
## [1] FALSE
#Depends on your computer/locale
x1 <- 0.5 - 0.3
x2 <- 0.3 - 0.1
x1 == x2
## [1] FALSE
# should use all.equal instead
all.equal(x1, x2)
## [1] TRUE
#concatenate characters
paste(char1, char2)
## [1] "a b"
paste(char1, char2, sep="_")
## [1] "a_b"
paste0(char1, char2)
## [1] "ab"
Simple types (S3 classes)
Vectors
Vectors are 1 dimensional collection of data of the same type. Indexing in R is always 1 based.
## [1] "integer"
class(a)
## [1] "integer"
#It can handle only one type! It will coerce the data into the most permissive type:
b <- c(1,2,3)
a <- c(1L,"two",3L)
typeof(a)
## [1] "character"
## [1] "double"
#vector functions
length(a)
## [1] 3
a2 <- c(a,a) #combine vectors
a2
## [1] 1 2 3 1 2 3
#compare vectors
a==b
## [1] TRUE TRUE TRUE
a==a
## [1] TRUE TRUE TRUE
#but!
a==a2
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
## [1] TRUE TRUE FALSE
a && b
## Warning in a && b: 'length(x) = 3 > 1' in coercion to 'logical(1)'
## Warning in a && b: 'length(x) = 3 > 1' in coercion to 'logical(1)'
## [1] TRUE
a | b
## [1] TRUE TRUE TRUE
a || b
## Warning in a || b: 'length(x) = 3 > 1' in coercion to 'logical(1)'
## [1] TRUE
## [1] 1
b[c(1,2)]
## [1] 1 2
b[1:2]
## [1] 1 2
a[a>2]
## [1] 3
b[b>2]
## [1] 4 NA
b[!is.na(b)]
## [1] 1 2 4
## [1] 1 2 3
## one two three
## 1 2 3
a["one"]
## one
## 1
paste(a, collapse = ",")
## [1] "1,2,3"
#other useful functions
sum(a)
## [1] 6
mean(a)
## [1] 2
median(a)
## [1] 2
sd(a)
## [1] 1
mean(b)
## [1] NA
mean(b, na.rm = T)
## [1] 2.333333
Factors
Factors in R are quite unique type. It is very useful in some cases, but in my experience, it can be source of errors if not handled properly. It is used for categorical variables. It is internally represented as integers, but has labels. The set of labels are fixed.
## [1] a b c a
## Levels: a b c d
class(a)
## [1] "factor"
typeof(a)
## [1] "integer"
a[2] <- "d"
#but!
a[2] <- "e"
## Warning in `[<-.factor`(`*tmp*`, 2, value = "e"): invalid factor level, NA
## generated
a <- factor(c("1", "2", "3"), levels = c("1", "2", "3", "4"))
b <- factor(c("1", "4", "3"), levels = c("1", "4", "3", "2"))
as.numeric(b)==as.numeric(a)
## [1] TRUE TRUE TRUE
a==b
## [1] TRUE FALSE TRUE
sort(a)
## [1] 1 2 3
## Levels: 1 2 3 4
sort(b)
## [1] 1 4 3
## Levels: 1 4 3 2
as.numeric(as.character(b))==as.numeric(as.character(a))
## [1] TRUE FALSE TRUE
Date and time
## [1] 365
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "Date"
current_date <- Sys.time()
current_date
## [1] "2022-05-18 13:43:20 UTC"
format(current_date, "%d %b %Y")
## [1] "18 May 2022"
## [1] "17 May 2022"
Lists
Similar to vectors, but can take lists as elements. The elements can have different types.
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
## [[1]]
## [1] 1 2 3 4
##
## [[2]]
## [1] "two"
##
## [[3]]
## [1] "three"
## [[1]]
## [[1]][[1]]
## [1] 1 2
##
## [[1]][[2]]
## [1] 2
##
## [[1]][[3]]
## [1] 3
##
## [[1]][[4]]
## [1] 4
##
##
## [[2]]
## [1] "two"
##
## [[3]]
## [1] "three"
#can have names
b <- list(numbers=c(1:4),letters=c("two", "three"))
#referring to an element/subsetting
a[[1]]
## [[1]]
## [1] 1 2
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
##
## [[4]]
## [1] 4
a[[1]][[1]]
## [1] 1 2
a[[1]][[1]][1]
## [1] 1
b$letters[1]
## [1] "two"
#functions
length(a)
## [1] 3
length(a$letters)
## [1] 0
Matrix
Data format to store values in a matrix (rows and columns). The elements can be primitive types. All element has to be the same type.
letter_mat <- matrix(data=c(rep("a", 3), rep("b", 3)), nrow = 3)
num_mat <- matrix(1:9, nrow=3, ncol=3)
# matrix can have row names but not column names
num_mat*num_mat #element-wise multiplication
## [,1] [,2] [,3]
## [1,] 1 16 49
## [2,] 4 25 64
## [3,] 9 36 81
num_mat %*% num_mat #matrix multiplication
## [,1] [,2] [,3]
## [1,] 30 66 102
## [2,] 36 81 126
## [3,] 42 96 150
t(num_mat) #Transpose
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
# many other matrix specific funtions and operators
#subsetting
letter_mat[1,2]
## [1] "b"
letter_mat[1,]
## [1] "a" "b"
letter_mat[1:2]
## [1] "a" "a"
letter_mat[3]
## [1] "a"
letter_mat[,1]
## [1] "a" "a" "a"
#combining
cbind(letter_mat, num_mat)
## [,1] [,2] [,3] [,4] [,5]
## [1,] "a" "b" "1" "4" "7"
## [2,] "a" "b" "2" "5" "8"
## [3,] "a" "b" "3" "6" "9"
rbind(letter_mat, letter_mat)
## [,1] [,2]
## [1,] "a" "b"
## [2,] "a" "b"
## [3,] "a" "b"
## [4,] "a" "b"
## [5,] "a" "b"
## [6,] "a" "b"
#row and column-wise operations
#matrixStats package - fast!
num_mat <- matrix(runif(100), nrow=10, ncol=10)
rowMeans(num_mat)
## [1] 0.3851638 0.6427088 0.6609717 0.3203019 0.4464088 0.5554319 0.5290633
## [8] 0.4225664 0.2569312 0.4657085
matrixStats::rowMeans2(num_mat)
## [1] 0.3851638 0.6427088 0.6609717 0.3203019 0.4464088 0.5554319 0.5290633
## [8] 0.4225664 0.2569312 0.4657085
rowSums(num_mat)
## [1] 3.851638 6.427088 6.609717 3.203019 4.464088 5.554319 5.290633 4.225664
## [9] 2.569312 4.657085
colMeans(num_mat)
## [1] 0.3228825 0.4540523 0.4353427 0.5532767 0.5179125 0.5816021 0.5123537
## [8] 0.2864224 0.5966993 0.4247122
colSums(num_mat)
## [1] 3.228825 4.540523 4.353427 5.532767 5.179125 5.816021 5.123537 2.864224
## [9] 5.966993 4.247122
Data frame
2D object type. Rows and columns with names. The columns can have different element types.
df <- data.frame(value=c(1,2,3), "First name"=as.factor(c("John", "John", "Peter")), last_name=as.factor(c("Smith", "Black", "Rabbit")), row.names = c("Patient 1", "Patient 2", "Patient 3"))
df[1:2,]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black
df[c(TRUE, TRUE, FALSE),]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black
df[c("Patient 1", "Patient 2"),]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black
df[-3,]
## value First.name last_name
## Patient 1 1 John Smith
## Patient 2 2 John Black