## Demonstration of central limit theorem ## Based on code in an anonymous comment to the blog post at \url{https://sas-and-r.blogspot.com/2012/01/example-919-demonstrating-central-limit.html} ## Libraries #### library(nortest) library(dplyr) library(ggplot2) ## Data used #### # right-triangle distribution (peak at 0; minimum at 1) data_vec <- rbeta(10000, shape1 = 1, shape2 = 2); ## Function #### #' @title Multiple comparisons of a given data vector to the normal distribution #' @param data_vec Data to sample for calculating means #' @param sample_size Sample size to use for calculating means #' @return A number representing the proportion of p-values < 0.05 #' @description Given a data vector and a sample size, computes 200 means #' based on \code{sample_size}, then compares the means to the normal distribution #' on the hypothesis H0: data_vec is normal using \code{sf.test()} from #' \code{library(nortest)}. Finally, computes and returns the fraction of p-values #' that reject the null hypothesis. Given a non-normal distribution, the Central #' Limit Theorem tells us that this proportion #' should decrease as \code{sample_size} increases. clt <- function(data_vec, sample_size) { means<-array(0, 200) pvals <- array(0, 200) for(i in 1:200) { for(j in i:200) { means[j] <- mean(sample(data, sample_size)) } pvals[i] <- sf.test(means)$p.val } length(pvals[pvals<0.05])/200 } ## Run \code{clt()} multiple times for a range of sample sizes. #### sample_size <- c(rep(2, times = 100), rep(10, times = 100), rep(100, times = 100), rep(1000, times = 100)) prop_pvalue <- data_frame(size = sample_size, pvalue = rep(NA, length(sample_size))) for(x in 1:length(sample_size)) { prop_pvalue[x, 2] <- clt(data = data_vec, sample_size = sample_size[x]) } ## Graph results #### prop_pvalue %>% mutate(size = as.factor(size)) %>% ggplot(aes(x = size, y = pvalue)) + geom_boxplot() + coord_flip() + labs(title = "Central Limit Theorem", x = expression(paste("Sample size, ", italic("n"))), y = "Proportion of non-normal distributions") + theme_minimal()