# ---------------------------------------------------------------- #
# LAD

# run a quantile regression
install.packages(quantreg)
library(quantreg)

# generate random versions of the data
n <- 20
beta <- c(-2,3)
x <- runif(n, 0, 1)
y.true <- beta[1] + x*beta[2]
y.norm <- y.true + rnorm(n)
data = y.norm

# add in an outlier 
use_outlier = T
if( use_outlier ){
x[1] = 0.75
data[1] = 6
y.true[1] <- beta[1] + x[1]*beta[2]
}

# and an influential point
use_ip = T
if( use_ip ){
x[2] = 2
data[2] = 0
y.true[2] <- beta[1] + x[2]*beta[2]
}

lmfit <- lm(data ~ x)
rqfit <- rq( data ~ x )

# make some plots
# plot the true and noisy versions of the data
par(mfrow=c(1,1))
ylim <- range(c(y.true, data))
plot(x, y.true, main="Data with linear mean", ylim=ylim)
points(x, data, col=2, pch=18)
legend("topleft", c("truth", "normal"), pch=c(21,18,17), col=1:3)

lines(x, predict(lmfit), col="red")
lines(x, predict(rqfit), col="blue")

# ------------------------------------------------------------------ #
# HUBER -- using T noise

## now consider a linear model:
n <- 20
beta <- c(-2,3)
x <- runif(n, 0, 1)
y.true <- beta[1] + x*beta[2]

# generate random versions of the data
y.norm <- y.true + rnorm(n)
y.t <- y.true + rt(n,df=1)
data = y.t

# to get robust regression
library( MASS )

# run regressions
lmfit <- lm(data ~ x)
rlmfit <- rlm(data ~ x)

# make some plots
# plot the true and noisy versions of the data
par(mfrow=c(1,1))
ylim <- range(c(y.true, data))
plot(x, y.true, main="Data with linear mean", ylim=ylim)
points(x, data, col=2, pch=18)
legend("topleft", c("truth", "normal"), pch=c(21,18,17), col=1:3)

lines(x, predict(lmfit), col="red")
lines(x, predict(rlmfit), col="green")

## create and plot residuals
resid.fit <- resid(lmfit)
resid.rfit <- resid(rlmfit)
ylim <- range(c(resid.fit, resid.rfit))
plot(x, resid.fit, pch=18, col="red", ylim=ylim,
     main="Residuals")
points(x, resid.rfit, pch=17, col="green")
abline(0, 0, lty=2)

sum(abs(resid.fit))
sum(abs(resid.rfit))
sum((resid.fit)^2)
sum((resid.rfit)^2)

# ------------------------------------------------------------------ #
# RIDGE
library(MASS)

n <- 10
beta <- c(3,2,-1,0)
x <- cbind( runif(n) , runif(n) , runif(n) , runif(n) )
y.true <- x%*%beta

# collect a few estimates from different data sets
data <- y.true + rnorm(n)
lmfit <- lm(data ~ 0 + x)
ridgefit <- lm.ridge( data~x )
lmcoeff = lmfit$coef
ridgecoeff = ridgefit$coef
for( i in 1:49 ){
     data <- y.true + rnorm(n)	
     lmfit <- lm(data ~ 0 + x )
     ridgefit <- lm.ridge( data~x )
     lmcoeff = rbind( lmcoeff , lmfit$coef )
     ridgecoeff = rbind( ridgecoeff , ridgefit$coef )
}

i = 4
boxplot( lmcoeff[,i] , ridgecoeff[,i] )

summary( lmfit )
confint( lmfit )

# ------------------------------------------------------------------ #
# LASSO

install.packages("lars")
library(lars)

n <- 10
beta <- c(3,2,-1,0)
x <- array( cbind( runif(n) , runif(n) , runif(n) , runif(n) ) , dim=c(10,4) )
y.true <- x%*%beta

# collect a few estimates from different data sets
data <- y.true + rnorm(n)
lmfit <- lm(data ~ 0 + x)
ridgefit <- lm.ridge( data~x )
lassofit <- lars( x , data )
lmcoeff = lmfit$coef
ridgecoeff = ridgefit$coef
lassocoeff = coef.lars( lassofit , mode="lambda" , s = 1)
for( i in 1:49 ){
     data <- y.true + rnorm(n)	
     lmfit <- lm(data ~ 0 + x )
     ridgefit <- lm.ridge( data~x )
     lassofit <- lars( x , data )	  
     lmcoeff = rbind( lmcoeff , lmfit$coef )
     ridgecoeff = rbind( ridgecoeff , ridgefit$coef )
     lassocoeff = rbind( lassocoeff , coef.lars( lassofit , mode="lambda" , s = 1) )
}

i = 4
boxplot( lmcoeff[,i] , ridgecoeff[,i] , lassocoeff[,i] )

plot.lars( lassofit )