### CLT FOR DISCRETE RANDOM VARIABLES ###

# Suppose X1, X2, X3, ... ~ Bern(p), independently. (So Sn ~ Binom(n, p).)
bernoulliSnHistogram <- function(n, p) {
  # These first three lines are particular to Bernoulli.
  mu <- p
  sigmaSquared <- p - p^2
  samples <- rbinom(n=10000, size=n, prob=p)
  title <- paste0(
    "sum of ", as.character(n), " IID Bern(", as.character(p), ")s")
  hist(samples, main=title)
}
bernoulliSnHistogram(n=10, p=0.1)
bernoulliSnHistogram(n=100, p=0.1)
bernoulliSnHistogram(n=1000, p=0.1)
bernoulliSnHistogram(n=10000, p=0.1)

# Suppose X1, X2, X3, ... ~ Pois(lambda), independently.
poissonSnHistogram <- function(n, lambda) {
  mu <- lambda
  sigmaSquared <- lambda
  samples <- replicate(10000, sum(rpois(n=n, lambda=lambda)))
  title <- paste0(
    "sum of ", as.character(n), " IID Pois(", as.character(lambda), ")s")
  hist(samples, main=title)
}
poissonSnHistogram(n=10, lambda=0.1)
poissonSnHistogram(n=100, lambda=0.1)
poissonSnHistogram(n=1000, lambda=0.1)
poissonSnHistogram(n=10000, lambda=0.1)

# Others you could try: uniform, geometric, negative binomial, hypergeometric.


### CLT FOR CONTINUOUS RANDOM VARIABLES ###

# Suppose X1, X2, X3, ... ~ Expo(lambda), independently.
exponentialSnHistogram <- function(n, lambda) {
  # These first three lines are particular to exponential.
  mu <- 1 / lambda
  sigmaSquared <- 1 / lambda^2
  samples <- replicate(10000, sum(rexp(n=n, rate=lambda)))
  title <- paste0(
    "sum of ", as.character(n), " IID Expo(", as.character(lambda), ")s")
  hist(samples, main=title)
}
exponentialSnHistogram(n=10, lambda=0.1)
exponentialSnHistogram(n=100, lambda=0.1)
exponentialSnHistogram(n=1000, lambda=0.1)
exponentialSnHistogram(n=10000, lambda=0.1)

# Suppose X1, X2, X3, ... ~ Unif(a, b), independently.
uniformSnHistogram <- function(n, a, b) {
  mu <- (a + b) / 2
  sigmaSquared <- (b - a)^2 / 12
  samples <- replicate(10000, sum(runif(n=n, min=a, max=b)))
  title <- paste0(
    "sum of ", as.character(n), " IID Unif(", as.character(a), ", ", 
    as.character(b), ")s")
  hist(samples, main=title)
}
uniformSnHistogram(n=10, a=0, b=10)
uniformSnHistogram(n=100, a=0, b=10)
uniformSnHistogram(n=1000, a=0, b=10)
uniformSnHistogram(n=10000, a=0, b=10)

# Suppose X1, X2, X3, ... ~ Norm(mu, sigma^2), independently.
normalSnHistogram <- function(n, mu, sigmaSquared) {
  samples <- replicate(10000, sum(rnorm(n=n, mean=mu, sd=sqrt(sigmaSquared))))
  title <- paste0(
    "sum of ", as.character(n), " IID Norm(", as.character(mu), ", ", 
    as.character(sigmaSquared), ")s")
  hist(samples, main=title)
}
normalSnHistogram(n=10, mu=0, sigmaSquared=10)
normalSnHistogram(n=100, mu=0, sigmaSquared=10)
normalSnHistogram(n=1000, mu=0, sigmaSquared=10)
normalSnHistogram(n=10000, mu=0, sigmaSquared=10)


### CHALLENGES TO THE CLT ###

# Here is a synthetic data set. It's really skewed (asymmetric about its mean). 
# It's so skewed that even its logarithm is skewed.
data <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 100, 1000)
hist(data)
hist(log(data))

# To explore how skewness affects the CLT, we're going to use this data set as 
# a distribution (in the same spirit as bootstrapping).

# Suppose X1, X2, X3, ... ~ Empirical(p), independently, where Empirical is the 
# distribution arising from the data set above. Statistical theory says that 
# our best estimates for E(Xi) and V(Xi) are the sample mean and variance, 
# respectively.
dataSnHistogram <- function(n) {
  mu <- mean(data)
  sigmaSquared <- sd(data)^2
  samples <- replicate(10000, sum(sample(data, n, replace=TRUE)))
  hist(samples)
}
dataSnHistogram(n=10)
dataSnHistogram(n=100)
dataSnHistogram(n=1000)
dataSnHistogram(n=10000)

# Even for a data set that skewed, the CLT works pretty well. So let's make an 
# even-more-skewed data set. The n at which the CLT works well seems a bit 
# larger than in the previous example.
data <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10000, 100000)
hist(data)
hist(log(data))
dataSnHistogram <- function(n) {
  mu <- mean(data)
  sigmaSquared <- sd(data)^2
  samples <- replicate(10000, sum(sample(data, n, replace=TRUE)))
  hist(samples)
}
dataSnHistogram(n=10)
dataSnHistogram(n=100)
dataSnHistogram(n=1000)
dataSnHistogram(n=10000)

# And how does the CLT handle bi-modal (two-peaked) data?
data <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009)
hist(data)
dataSnHistogram <- function(n) {
  mu <- mean(data)
  sigmaSquared <- sd(data)^2
  samples <- replicate(10000, sum(sample(data, n, replace=TRUE)))
  hist(samples)
}
dataSnHistogram(n=10)
dataSnHistogram(n=100)
dataSnHistogram(n=1000)
dataSnHistogram(n=10000)


### HARDER CHALLENGES TO THE CLT ###

# Suppose X1, X2, X3, ... ~ Cauchy(0, 1), independently. The Cauchy 
# distribution does not have a mean or variance, so the CLT does not apply, but 
# let's see how it does anyway.
cauchySnHistogram <- function(n) {
  samples <- replicate(10000, sum(rcauchy(n)))
  title <- paste0(
    "sum of ", as.character(n), " IID Cauchy(0, 1)s")
  print(range(samples))
  hist(samples, main=title)
}
cauchySnHistogram(n=10)
cauchySnHistogram(n=100)
cauchySnHistogram(n=1000)
cauchySnHistogram(n=10000)

# The Pareto distribution has PDF f(x) = 2 x^-3 on [1, infinity). A Pareto X 
# has E(X) = 2 but infinite variance. So the CLT does not apply, but let's see.
rUnfair <- function(n) {
  if (n == 1) {
    u <- runif(1, min=0, max=1)
    sqrt(1 / (1 - u))
  } else
    replicate(n, rUnfair(1))
}
unfairSnHistogram <- function(n) {
  mu <- 2
  samples <- replicate(10000, sum(rUnfair(n=n)))
  print(range(samples))
  hist(samples)
}
unfairSnHistogram(n=10)
unfairSnHistogram(n=100)
unfairSnHistogram(n=1000) # Takes a minute or two.
unfairSnHistogram(n=10000) # Takes tens of minutes?