1 - Recap of R Basics

Author

Peter Nutter

Published

Sunday, April 14, 2024

# Generate a sequence of numbers from 1 to 20 with a step of 2
print(seq(1, 20, 2))
 [1]  1  3  5  7  9 11 13 15 17 19
# Create a sequence of numbers from 1 to 20
1:20
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
# Create a vector of numbers
x = c(1, 2, 3, 4, 5)
x
[1] 1 2 3 4 5
# Calculate the square root of 4
sqrt(4)
[1] 2
# Generate 5 random numbers from a uniform distribution
runif(5)
[1] 0.78997690 0.52227519 0.67701569 0.28452173 0.07227832
# Calculate the quantile function (inverse of CDF) for the normal distribution at 0.95
qnorm(0.95)
[1] 1.644854
# Get help on the pt function (t-distribution cumulative distribution function)
head(?pt)
[1] "/Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library/stats/help/TDist"
# Search for help on the term "pt"
# help.search("pt")
# Calculate the cumulative probability for the t-distribution with 10 degrees of freedom at 1.96
pt(1.96, df = 10)
[1] 0.9607819
# Calculate the gamma density with shape = 1 and rate = 1
dgamma(1, shape = 1, rate = 1)
[1] 0.3678794
# Calculate the ranks of the numbers in the vector
rank(c(1, 3, 2, 4, 5, 5, 5))
[1] 1 3 2 4 6 6 6
# Sort the numbers in decreasing order
sort(c(5, 3, 2, 1, 4), decreasing = TRUE)
[1] 5 4 3 2 1
# Generate 100 random numbers from a normal distribution
x = rnorm(50)
# Calculate the standard deviation of the generated numbers
x_sd = sd(x)
x_sd
[1] 0.9701469
# Calculate the correlation between x and -x
correlation = cor(x, -x)
correlation
[1] -1
# Create a table showing the number of positive and non-positive numbers in x
table(x > 0)

FALSE  TRUE 
   25    25 
# Handle missing values in a vector
z = c(1, 2, NA, 4, 5)
z
[1]  1  2 NA  4  5
# Check for missing values in z
is.na(z)
[1] FALSE FALSE  TRUE FALSE FALSE
# {R}emove missing values from z
z[!is.na(z)]
[1] 1 2 4 5
# Check if the density of the normal distribution at 0 equals 1/sqrt(2*pi)
dnorm(0) == 1/sqrt(2*pi)
[1] TRUE
# Check if the CDF of the normal distribution at 0 equals 0.5
pnorm(0) == 0.5
[1] TRUE
# Check if the quantile function of the normal distribution at 0 equals -Inf
qnorm(0) == -Inf
[1] TRUE
# Calculate the density of the normal distribution at 0 with mean = 2 and sd = 5
means = 2
sdev = 5
dnorm(0, mean = means, sd = sdev)
[1] 0.07365403
# Create a 3x2 matrix filled by column with numbers 1 to 6
mat = matrix(1:6, nrow = 3, ncol = 2)
mat
     [,1] [,2]
[1,]    1    4
[2,]    2    5
[3,]    3    6
# Create a 3x2 matrix filled by row with numbers 1 to 6
mat = matrix(1:6, 3, byrow = TRUE)
mat
     [,1] [,2]
[1,]    1    2
[2,]    3    4
[3,]    5    6
# Create a numeric vector of length 4
numeric(4)
[1] 0 0 0 0
# Create an integer vector of length 4
integer(4)
[1] 0 0 0 0
# {R}epeat the number 1, 4 times
rep(1, 4)
[1] 1 1 1 1
# Create a 2x3 matrix filled with zeros
mat = matrix(0, 2, 3)
mat
     [,1] [,2] [,3]
[1,]    0    0    0
[2,]    0    0    0
# Access the first column of the matrix
mat[, 1]
[1] 0 0
# Access the first row of the matrix
mat[1, ]
[1] 0 0 0
# Access the element in the first row and first column
mat[1, 1]
[1] 0
# Compute the square of the matrix by matrix multiplication
mat_sqvr = mat %*% t(mat)
mat_sqvr
     [,1] [,2]
[1,]    0    0
[2,]    0    0
# Multiply two vectors element-wise
rep(1, 4) * rep(3, 4)
[1] 3 3 3 3
# Create a 3x3 matrix filled by column with numbers 1 to 9
mat = matrix(1:9, 3, 3)
# Non sigular
mat = matrix(c(1, 2, 3, 4), 2, 2)
# Calculate the inverse of the matrix
solve(mat)
     [,1] [,2]
[1,]   -2  1.5
[2,]    1 -0.5
# Extract the diagonal elements of the matrix
diag(mat)
[1] 1 4
# Create a scatter plot
plot(1:10, seq(1, 20, 2))

# Create a line plot with customizations
plot(1:10, seq(1, 20, 2), type = "l", col = "blue", lwd = 2, main = "Line Plot", xlab = "X-axis", ylab = "Y-axis")
abline(h = 10, col = "red", lwd = 6)

# Add a curve to the existing plot
# curve(x^2, from = -10, to = 10, col = "green", lwd = 2, lty = 1, add = TRUE)
# Create a histogram of 1000 random normal numbers
hist(rnorm(1000), col = "blue", main = "Histogram of 1000 random normal numbers", xlab = "Value", ylab = "Frequency")

# Create a barplot of numbers from 1 to 10
barplot(1:10, col = "red", main = "Barplot of 1:10", xlab = "Index", ylab = "Value")

# Create an empirical cumulative distribution function plot
plot.ecdf(rnorm(1000), col = "blue", main = "ECDF of 1000 random normal numbers", xlab = "Value", ylab = "Cumulative Probability")

# Get help on the qqplot function
?qqplot
# Create a Q-Q plot comparing x and x^2
x = rnorm(1000)
qqplot(x, x^2)

# Create a Q-Q normal plot
qqnorm(x)

# Get help on the qqline function
?qqline
# Add a Q-Q line to the Q-Q normal plot
# qqline(x, distribution = qnorm)
# Create a boxplot of 1000 random normal numbers
boxplot(rnorm(1000), col = "red", main = "Boxplot of 1000 random normal numbers", xlab = "Value", ylab = "Frequency")

# Create a sample data vector
data <- c(3.1, 4.2, 4.3, 5.4, 5.5, 5.6, 6.7, 7.8, 8.9, 9.0)
# Create a stem-and-leaf plot of the sample data
stem(data)

  The decimal point is at the |

  2 | 1
  4 | 23456
  6 | 78
  8 | 90