# ---------------------------------------------------------------- # # generate random versions of the data n <- 20 beta <- c(-2,3) x <- runif(n, 0, 1) x = sort( x ) y.true <- beta[1] + x*beta[2] # try some more y.norm <- y.true + rnorm(n) data = y.norm # add in an outlier use_outlier = T if( use_outlier ){ x[1] = 0.75 data[1] = 6 y.true[1] <- beta[1] + x[1]*beta[2] } # and an influential point use_ip = T if( use_ip ){ x[n] = 2 data[n] = 0 y.true[n] <- beta[1] + x[n]*beta[2] } lmfit <- lm(data ~ x) # make some plots # plot the true and noisy versions of the data par(mfrow=c(1,1)) ylim <- range(c(y.true, data)) plot(x, y.true, main="Data with linear mean", ylim=ylim) points(x, data, col=2, pch=18) legend("topleft", c("truth", "normal"), pch=c(21,18,17), col=1:3) lines(x, predict(lmfit), col="red") plot( x , lmfit$residuals ) points( x , rstandard(lmfit) , pch=18 ) plot( x , cooks.distance( lmfit ) ) # --------------------------------------------------------------------------- # # try a couple of times to look at residuals # try some more to look at residuals d = lmfit$residuals for ( i in 1:100 ){ y.norm <- y.true + rnorm(n) data = y.norm lmfit <- lm(data ~ x) d = rbind( d , lmfit$residuals ) }