# ---------------------------------------------------------------- #

# generate random versions of the data
n <- 20
beta <- c(-2,3)
x <- runif(n, 0, 1)
x = sort( x )
y.true <- beta[1] + x*beta[2]

# try some more
y.norm <- y.true + rnorm(n)
data = y.norm

# add in an outlier 
use_outlier = T
if( use_outlier ){
x[1] = 0.75
data[1] = 6
y.true[1] <- beta[1] + x[1]*beta[2]
}

# and an influential point
use_ip = T
if( use_ip ){
x[n] = 2
data[n] = 0
y.true[n] <- beta[1] + x[n]*beta[2]
}

lmfit <- lm(data ~ x)

# make some plots
# plot the true and noisy versions of the data
par(mfrow=c(1,1))
ylim <- range(c(y.true, data))
plot(x, y.true, main="Data with linear mean", ylim=ylim)
points(x, data, col=2, pch=18)
legend("topleft", c("truth", "normal"), pch=c(21,18,17), col=1:3)

lines(x, predict(lmfit), col="red")


plot( x , lmfit$residuals )
points( x , rstandard(lmfit) , pch=18 ) 
plot( x ,  cooks.distance( lmfit ) )

# --------------------------------------------------------------------------- #
# try a couple of times to look at residuals

# try some more to look at residuals
d = lmfit$residuals
for ( i in 1:100 ){
y.norm <- y.true + rnorm(n)
data = y.norm
lmfit <- lm(data ~ x)
d = rbind( d , lmfit$residuals )
}