# --- general qq comparisons --- #
x = rnorm( 5000 , 0 , 1 )
y = rnorm( 5000 , 0 , 1 )
plot.new()
qqplot( x , y )

# shift and scale y
y = rnorm( 5000 , 1 , 2 )
plot.new()
qqplot( x , y )

# choose y from a t-dist
y = rt( 5000 , df = 5 )
plot.new()
qqplot( x , y )

# --- means are gaussian --- #
n = 2
N = 5000

# create N datasets with n points each
data_set = array( rnorm( n * N , 0 , 1 ) , c( n , N ) )

# compute their means
empirical_mean_set = apply( data_set , 2 , mean )

# plot their means against the quantiles, good fit
qqnorm( empirical_mean_set )
qqline( empirical_mean_set )

# compute empirical mean and variance
empirical_mean_mean = mean( empirical_mean_set )
empirical_mean_var = var( empirical_mean_set )

# plot the dists -- again good match
x = seq( -2 , 2 , 0.01 )
normal_d = dnorm( x , empirical_mean_mean , sqrt( empirical_mean_var ) )
plot.new()
plot.window(xlim=c(-2,2) , ylim=c(0,max( normal_d)))
lines(density(empirical_mean_set),main="Density estimate of data",col='red')
lines( x , normal_d )

# --- permutation test demo --- #
x = rnorm( 50 , 0 , 2 )
y = rnorm( 50 , 1 , 2 )
t.test( x , y )

# actually compute the difference of means
true_diff = mean( x ) - mean( y )

# compute the difference of means under permutations
z = c( x , y )
sample_diff = z
for ( i in 1:500 ){
    z = sample( z )
    sample_diff[ i ] = mean( z[0:49] ) - mean( z[50:99] )
}
plot.new()
plot( density( sample_diff ) )
points( true_diff , 0 , col='red' )


plot( ecdf( sample_diff ) )
points( true_diff , 0 , col='red' )