#
#
####### Central Limit Theorem simulations #######
#
#
# According to Howell (2007):
# "The central limit theorem is a factual statement about the distribution of means. In an extended form,
# it states,
# Given a poplation with mean 'mu' and variance 'sigma^2', the sampling distribution of the mean (the
# distribution of sample means) will have a mean equal to 'mu', a variance equal to 'sigma^2'/n, and
# a standard deviation equal to 'sigma'/sqrt(n). The distribution will approach the normal distribution
# as n, the sample size, increases" (p. 170).
#
#
################## Binomial Simulation ##################
#
# As sample size increases, proportion of heads (p.heads) condenses on 0.50; with
# a fair coin (i.e. heads = .50).
#
coin <- c(1, 0) # 1 = heads, 0 = tails
heads <- .50 # Fair coin: heads <- .50
flips <- 10 # i.e. 'sample size'
trials <- 100 # Iterations
speed <- .25 # Number of seconds between trials
p.heads <- as.vector(0) # Proportion of heads for each trial
s.add <- as.vector(0) # Number of flips for each trial (increases with each trial)
par(mfrow = c(2,1)) # Display two graphs in one window (2 rows, 1 column)
for (i in 1:trials){
flips <- round(flips + 1)
p.heads[i] <- sum(sample(coin, flips, replace = TRUE, prob = c(heads,1-heads)))/flips
s.add[i] <- flips
plot(s.add, p.heads, ylim = c(0,heads + .5))
if (i > 2){
hist(p.heads, main = "Proportion of Heads", xlim = c(0,heads + .5), col = "lightblue1", prob = T)
lines(density(p.heads), col = "blue")}
Sys.sleep(speed)
}
################## Gaussian (normal distribution) Simulation ##################
#
# As sample size increases, each sample will more closely resemble a normal distribution and as the
# "distribution of sample means" grows (with each sample), it will center on the population mean (mu).
#
# The Population of 1000000 individuals with a mean (mu) of 100, standard deviation (sigma) of 15.
pop <- rnorm(1000000, 100, 15)
par(mfrow = c(1,1))
hist(pop, col = "lightblue1", xlim = c(40, 160), prob = TRUE)
lines(density(pop), col = "blue")
# Setting the initial conditions.
n <- 4 # Initial sample size (this will increase by 1 with each loop of the simulation).
draws <- 1000 # The number of samples to draw from the population (i.e. loops or iterations).
sample.means <- as.vector(0) # Empty vector for the sample means from each sample drawn during the simulation.
par(mfrow = c(2,1)) # Display two graphs in one window, 2 rows on 1 column.
# Simulation.
for (i in 1:draws){
n <- n + 1
sample <- sample(pop, n, replace = FALSE)
sample.means[i] <- mean(sample)
hist(sample, col = "lightblue1", xlim = c(40,160), xlab=" ", prob = TRUE,
main = "Histogram of each Sample")
lines(density(sample), col="blue")
hist(sample.means, col = "lightgreen", xlim = c(85,115), xlab=" ",
main = "Distribution of Sample Means")
Sys.sleep(.25)
}
################## Exponential (NON-normal) Distribution ##################
#
# Even with a NON-normally distributed Population; as the "distribution of sample means"
# grows (with each sample), the distribution of sample means will become symmetrical and centered
# on the population mean.
#
# Exponential distribution of the Population of 1000000 individuals with a mean (mu) of approximately 20.
pop <- rexp(1000000, 1/20)
mean(pop)
par(mfrow = c(1,1))
hist(pop, col = "lightblue1", xlim = c(0, 160), prob = TRUE)
lines(density(pop), col = "blue")
# Setting the initial conditions.
n <- 4 # Initial sample size (this will increase by 1 with each loop of the simulation).
draws <- 1000 # The number of samples to draw from the population (i.e. loops or iterations).
sample.means <- as.vector(0) # Empty vector for the sample means from each sample drawn during the simulation.
par(mfrow = c(2,1)) # Display two graphs in one window, 2 rows on 1 column.
# Simulation.
for (i in 1:draws){
n <- n + 1
sample <- sample(pop, n, replace = FALSE)
sample.means[i] <- mean(sample)
hist(sample, col = "lightblue1", xlim = c(0,160), xlab=" ", prob = TRUE,
main = "Histogram of each Sample")
lines(density(sample), col="blue")
hist(sample.means, col = "lightgreen", xlim = c(0,40), xlab=" ",
main = "Distribution of Sample Means")
Sys.sleep(.25)
}
################################################################################
# Even with a static sample size, the distribution of means will grow to be nearly
# normal as more samples are accumulated.
# Exponential (NON-normal) Distribution
# Exponential distribution of the Population of 1000000 individuals with a
# mean (mu) of approximately 20.
pop <- rexp(1000000, 1/20)
mean(pop)
par(mfrow = c(1,1))
hist(pop, col = "lightblue1", xlim = c(0, 160), prob = TRUE)
lines(density(pop), col = "blue")
# Setting the initial conditions.
n <- 50 # Initial sample size (the same in each iteration/sample).
draws <- 1000 # The number of samples to draw from the population (i.e. loops or iterations).
sample.means <- as.vector(0) # Empty vector for the sample means from each sample drawn during the simulation.
par(mfrow = c(2,1)) # Display two graphs in one window, 2 rows on 1 column.
# Simulation.
for (i in 1:draws){
sample <- sample(pop, n, replace = FALSE)
sample.means[i] <- mean(sample)
hist(sample, col = "lightblue1", xlim = c(0,160), xlab=" ", prob = TRUE,
main = "Histogram of each Sample")
lines(density(sample), col="blue")
hist(sample.means, col = "lightgreen", xlim = c(0,40), xlab=" ",
main = "Distribution of Sample Means")
Sys.sleep(.25)
}
################## Reference ##################
#
# Howell, D. C. (2007). Statistical Methods for Psychology (6th ed.). Belmont, CA: Thomson Wadsworth.
#
# Feb. 2011