C Simulation and visualization
Reading materials: Chapters 3, 5, and 7 in R for data science by Garrett Grolemund and Hadley Wickham (optional).
C.1 Simulation and visualization: univariate
## Random seed
set.seed(1) # set random number generator seed for reproducibility
runif(1)
runif(1)
set.seed(2)
runif(1)
set.seed(1)
runif(1)
runif(1)
## Uniform distribution
### Draw ten uniform random variables:
runif(10)
# use ?runif to see what other options are available
# To help understand this function, we can draw the density plot for this distribution
x.grid=seq(from=0,to=1,length.out=100)
unif.pdf=dunif(x.grid,min=0,max=1);
plot(y=unif.pdf,x=x.grid,type='l')
# Compare this with a histogram from a set of random variables from runif:
unif.rv.100=runif(100);
hist(unif.rv.100,freq=FALSE,main='Uniform',xlab='X')
lines(y=unif.pdf,x=x.grid,col='blue',lwd=3)
# Further compare it with a smooth density estimation based on the samples:
unif.pdf.est=density(unif.rv.100);
lines(unif.pdf.est,col='red',lwd=3)
# Wrap up the above plotting code as a function
plot.density.empirical<-function(rvs,pdf.true,pdf.grid,main){
hist(rvs,freq=FALSE,main=main,xlab='X')
lines(y=pdf.true,x=pdf.grid,col='blue',lwd=3)
# Further compare it with a smooth density estimation based on the samples:
pdf.est=density(rvs);
lines(pdf.est,col='red',lwd=3)
}
plot.density.empirical(unif.rv.100,unif.pdf,x.grid,main='Uniform')
### Normal
### Draw ten normal random variables with mean 0 and variance 2
rnorm(10,mean=0,sd=sqrt(2))
### There are many other distributions in R
### try ?rchisq, ?rf, ?rt, ?rbeta, ?rpois, ...
## Other distributions
### Poisson ?rpois
### Exponential ?rexp
### t ?rt
### Chisq ?rchisq
### F ?rf
## Sampling from a user-specified vector
### Draw samples from any vectors using sample()
wrd = c("yet", "a","new","sentence")
sample(wrd,size=2)
# use ?sample to see other options
### Visulize the simulated data
y<-rnorm(100);
# We start with the very basic histogram
hist(y)
# Use ?hist to modify the plot
# And the boxplot:
boxplot(y)
# We can also draw visualize the data using ggplot2
library(ggplot2)
dat=data.frame(y=y)
ggplot(dat, aes(x=1,y=y)) +
geom_violin(trim=FALSE)
# Read more about the violin plots in the post here:
# http://www.sthda.com/english/wiki/ggplot2-violin-plot-quick-start-guide-r-software-and-data-visualization
C.2 Simulation and visualization: multivariate
# We can generate many independent univariate random variables using code in the previous section
# Here we will generate dependent random variables, for instance,
n=50;# sample size
x1=rnorm(n);
x2=x1*2+runif(n)*2;
x3=x1*x2*rpois(n,lambda=3);
# You can also generate a random variable using its probability density function using importance sampling
# We will be fine with the simple data generating method in this class
# We put the three random variables into one data.frame for ease of plotting
dat.multi<-data.frame(x1=x1,x2=x2,x3=x3)
### Visualization
### Pairwise scatter plot
pairs(dat.multi,pch='.',cex=4)
### Visualization in ggplot2
library(GGally)
ggpairs(dat.multi)
### For the pair x1 and x3
# A more informative scatterplot, using base R
plot(x=dat.multi$x1,y=dat.multi$x3,data=dat.multi,pch=16,cex=2)
lines(lowess(dat.multi$x1, dat.multi$x3), col=2,lwd=3)
# Or using ggplot2
ggplot(data = dat.multi) +
geom_point(mapping = aes(x = x1, y = x3)) +
geom_smooth(mapping = aes(x = x1, y = x3))