# --- general qq comparisons --- # x = rnorm( 5000 , 0 , 1 ) y = rnorm( 5000 , 0 , 1 ) plot.new() qqplot( x , y ) # shift and scale y y = rnorm( 5000 , 1 , 2 ) plot.new() qqplot( x , y ) # choose y from a t-dist y = rt( 5000 , df = 5 ) plot.new() qqplot( x , y ) # --- means are gaussian --- # n = 2 N = 5000 # create N datasets with n points each data_set = array( rnorm( n * N , 0 , 1 ) , c( n , N ) ) # compute their means empirical_mean_set = apply( data_set , 2 , mean ) # plot their means against the quantiles, good fit qqnorm( empirical_mean_set ) qqline( empirical_mean_set ) # compute empirical mean and variance empirical_mean_mean = mean( empirical_mean_set ) empirical_mean_var = var( empirical_mean_set ) # plot the dists -- again good match x = seq( -2 , 2 , 0.01 ) normal_d = dnorm( x , empirical_mean_mean , sqrt( empirical_mean_var ) ) plot.new() plot.window(xlim=c(-2,2) , ylim=c(0,max( normal_d))) lines(density(empirical_mean_set),main="Density estimate of data",col='red') lines( x , normal_d ) # --- permutation test demo --- # x = rnorm( 50 , 0 , 2 ) y = rnorm( 50 , 1 , 2 ) t.test( x , y ) # actually compute the difference of means true_diff = mean( x ) - mean( y ) # compute the difference of means under permutations z = c( x , y ) sample_diff = z for ( i in 1:500 ){ z = sample( z ) sample_diff[ i ] = mean( z[0:49] ) - mean( z[50:99] ) } plot.new() plot( density( sample_diff ) ) points( true_diff , 0 , col='red' ) plot( ecdf( sample_diff ) ) points( true_diff , 0 , col='red' )