##################################################################### ### BMTRY 790: MACHINE LEARNING AND DATA MINING, Summer 2023 ### ### ### ### Lecture 4: Penalized Regression, Part II ### ### ### ### Body Fat Data Analysis ### ### ### ### Looking at linear regression, ridge regression, lasso, and ### ### least angle regression using the body fat data ### ##################################################################### library(MASS) bodyfat<-read.csv("H:/public_html/BMTRY790_Summer2023/Datasets/Body_fat.csv") bodyfat<-bodyfat[,2:15] #################################### ### Scaling and Centering Data ### #################################### ### For penalized regression, we want to center and scale the variables before fitting the model ### we can do this using the scale function in R (this subtracts the mean and divides by SD for all variables in a matrix bodyfat2<-scale(bodyfat) bodyfat2<-as.data.frame(bodyfat2) ############################################################# ### Full Regression Model on scaled data (for comparison) ### ############################################################# mod13<-lm(PBF~., data=bodyfat2) summary(mod13) ######################################################## ### Fitting model usinf Forward Stagewise approach ### ######################################################## library(lars) mod_fsw<-lars(x=as.matrix(bodyfat2[,2:14]),y=as.vector(bodyfat2[,1]), type="for") ### Can use abbreviation for forward stagewise summary(mod_fsw) par(mfrow=c(1,2)) plot(mod_fsw, breaks=F) plot(mod_fsw, breaks=F, plottype="Cp") round(mod_fsw$beta, 3) ################################# ### Fitting Ridge Model ### ################################# ### Sequence of lambdas for consideration when fitting ridge model lam<-c(seq(0,9.99, by=.01),seq(10,99.9, by=.1),seq(101,10000, by=1)) ridgemod<-lm.ridge(PBF~., data=bodyfat2, lam=lam) select(ridgemod) ### Provides the value of lambda that generated the smalest GCV plot(ridgemod) ### Trace plot for the ridge models ridgemod2<-lm.ridge(PBF~., data=bodyfat2, lam=1.18) ################################ ### LARS/Lasso Example ### ################################ library(glmnet) ### Using lars par(mfrow=c(2,3)) cvmod_lass <- cv.lars(x=as.matrix(bodyfat2[, 2:14]),y=as.vector(bodyfat2[,1]), type="lasso") mod_lass <- lars(x=as.matrix(bodyfat2[, 2:14]),y=as.vector(bodyfat2[,1]), type="lasso") plot(mod_lass, breaks=F) mod_lar <- lars(x=as.matrix(bodyfat2[, 2:14]),y=as.vector(bodyfat2[,1]), type="lar") plot(mod_lar, breaks=F) mod_stgw <- lars(x=as.matrix(bodyfat2[, 2:14]),y=as.vector(bodyfat2[,1]), type="for") plot(mod_stgw, breaks=F) mod_stpw <- lars(x=as.matrix(bodyfat2[, 2:14]),y=as.vector(bodyfat2[,1]), type="stepwise") plot(mod_stpw, breaks=F) ### Using glmnet fit.cv<-cv.glmnet(x=as.matrix(bodyfat2[,2:14]),y=as.vector(bodyfat2[,1]), alpha=1) plot(fit.cv, sign.lambda=-1) fit2<-glmnet(x=as.matrix(bodyfat2[,2:14]),y=as.vector(bodyfat2[,1]), alpha=1) plot(fit2) coef(fit2, s=fit.cv$lambda.min) coef(fit2, s=fit.cv$lambda.1se)