# ---------------------------------------------------------------- # # LAD # run a quantile regression install.packages(quantreg) library(quantreg) # generate random versions of the data n <- 20 beta <- c(-2,3) x <- runif(n, 0, 1) y.true <- beta[1] + x*beta[2] y.norm <- y.true + rnorm(n) data = y.norm # add in an outlier use_outlier = T if( use_outlier ){ x[1] = 0.75 data[1] = 6 y.true[1] <- beta[1] + x[1]*beta[2] } # and an influential point use_ip = T if( use_ip ){ x[2] = 2 data[2] = 0 y.true[2] <- beta[1] + x[2]*beta[2] } lmfit <- lm(data ~ x) rqfit <- rq( data ~ x ) # make some plots # plot the true and noisy versions of the data par(mfrow=c(1,1)) ylim <- range(c(y.true, data)) plot(x, y.true, main="Data with linear mean", ylim=ylim) points(x, data, col=2, pch=18) legend("topleft", c("truth", "normal"), pch=c(21,18,17), col=1:3) lines(x, predict(lmfit), col="red") lines(x, predict(rqfit), col="blue") # ------------------------------------------------------------------ # # HUBER -- using T noise ## now consider a linear model: n <- 20 beta <- c(-2,3) x <- runif(n, 0, 1) y.true <- beta[1] + x*beta[2] # generate random versions of the data y.norm <- y.true + rnorm(n) y.t <- y.true + rt(n,df=1) data = y.t # to get robust regression library( MASS ) # run regressions lmfit <- lm(data ~ x) rlmfit <- rlm(data ~ x) # make some plots # plot the true and noisy versions of the data par(mfrow=c(1,1)) ylim <- range(c(y.true, data)) plot(x, y.true, main="Data with linear mean", ylim=ylim) points(x, data, col=2, pch=18) legend("topleft", c("truth", "normal"), pch=c(21,18,17), col=1:3) lines(x, predict(lmfit), col="red") lines(x, predict(rlmfit), col="green") ## create and plot residuals resid.fit <- resid(lmfit) resid.rfit <- resid(rlmfit) ylim <- range(c(resid.fit, resid.rfit)) plot(x, resid.fit, pch=18, col="red", ylim=ylim, main="Residuals") points(x, resid.rfit, pch=17, col="green") abline(0, 0, lty=2) sum(abs(resid.fit)) sum(abs(resid.rfit)) sum((resid.fit)^2) sum((resid.rfit)^2) # ------------------------------------------------------------------ # # RIDGE library(MASS) n <- 10 beta <- c(3,2,-1,0) x <- cbind( runif(n) , runif(n) , runif(n) , runif(n) ) y.true <- x%*%beta # collect a few estimates from different data sets data <- y.true + rnorm(n) lmfit <- lm(data ~ 0 + x) ridgefit <- lm.ridge( data~x ) lmcoeff = lmfit$coef ridgecoeff = ridgefit$coef for( i in 1:49 ){ data <- y.true + rnorm(n) lmfit <- lm(data ~ 0 + x ) ridgefit <- lm.ridge( data~x ) lmcoeff = rbind( lmcoeff , lmfit$coef ) ridgecoeff = rbind( ridgecoeff , ridgefit$coef ) } i = 4 boxplot( lmcoeff[,i] , ridgecoeff[,i] ) summary( lmfit ) confint( lmfit ) # ------------------------------------------------------------------ # # LASSO install.packages("lars") library(lars) n <- 10 beta <- c(3,2,-1,0) x <- array( cbind( runif(n) , runif(n) , runif(n) , runif(n) ) , dim=c(10,4) ) y.true <- x%*%beta # collect a few estimates from different data sets data <- y.true + rnorm(n) lmfit <- lm(data ~ 0 + x) ridgefit <- lm.ridge( data~x ) lassofit <- lars( x , data ) lmcoeff = lmfit$coef ridgecoeff = ridgefit$coef lassocoeff = coef.lars( lassofit , mode="lambda" , s = 1) for( i in 1:49 ){ data <- y.true + rnorm(n) lmfit <- lm(data ~ 0 + x ) ridgefit <- lm.ridge( data~x ) lassofit <- lars( x , data ) lmcoeff = rbind( lmcoeff , lmfit$coef ) ridgecoeff = rbind( ridgecoeff , ridgefit$coef ) lassocoeff = rbind( lassocoeff , coef.lars( lassofit , mode="lambda" , s = 1) ) } i = 4 boxplot( lmcoeff[,i] , ridgecoeff[,i] , lassocoeff[,i] ) plot.lars( lassofit )