######################################################################################################### ### BMTRY 790: Machine Learning ### ### Spring 2023 ### ### ### ### R CODE for CART and Logic Regression tree-based methods. ### ### ### ######################################################################################################### ##################################### ### SINGLE TREE METHODS FOR THE ### ### BREAST CANCER DATA SET ### ##################################### ### CART ### library(rpart) library(rpart.plot) library(caret) ### Regression Tree air<-read.csv("H:\\public_html\\BMTRY790_Spring2023\\Datasets\\Air.csv") Cair1<-rpart(log(Ozone)~., data=air) plot(Cair1, compress=T, main="CART model 1 for Ozone") text(Cair1, use.n=T) rpart.plot(Cair1, compress=T, main="CART model 1 for Ozone") ###Tuning the CART model using “prune” prune(Cair1, cp=0.05) ###Tuning CART on from end using rpart.control Cair2<-rpart(log(Ozone)~., data=air, control=rpart.control(minsplit=6, minbucket=2, cp=0.01)) Cair3<-rpart(log(Ozone)~., data=air, control=rpart.control(minsplit=10, minbucket=5, cp=0.05)) #Note rpart.control sets control parameters for fitting the CART model # minsplit sets min number observations that node must have for split to be tried # minbucket is min number observations in terminal node # cp is complexity parameter-> any split that doesn't imporve overall lack of fit by a factor cp not used par(mfrow=c(1,3)) rpart.plot(Cair1, compress=T, main="CART model 1 for Ozone") rpart.plot(Cair2, compress=T, main="CART model 2 for Ozone") rpart.plot(Cair3, compress=T, main="CART model 3 for Ozone") ###Tuning using caret package (Data must be complete) air2<-na.omit(air[,c(1:4,7)]) trair<-train(log(Ozone) ~ ., data=air2, method="rpart", trControl=trainControl(method="boot", number=1000)) ### Classification Tree ln<-read.csv("H:\\public_html\\BMTRY790_Spring2023\\Datasets\\LupusNephritis.csv") Cmod1<-rpart(CR90~., data=ln, method="class", control=rpart.control(minsplit=6, minbucket=2, cp=0.05)) Cmod1 rpart.plot(Cmod1, compress=T, main="CART model for Treatment Response LN") trln<-train(as.factor(CR90) ~ ., data=ln, method="rpart", trControl=trainControl(method="boot", number=1000)) trln ######################## ### Logic Regression ### ######################## library(LogicReg) BC<-read.csv("H:\\public_html\\BMTRY790_Spring2023\\Datasets\\BC_trees.csv") BC$agecat<-ifelse(BC$age<61, 0,1) BC$gradecat<-ifelse(BC$gradecat<3, 0, 1) ### Fitting a classification model ###Using CV to select number of leaves anneal.params <- logreg.anneal.control(start = 2, end = -1, iter = 100000) logreg(resp=BC$gradecat, bin=BC[,c(3:8,10)], type=1, select=3, ntrees=1, nleaves=c(2,8), kfold=5, anneal.control = anneal.params) anneal.params <- logreg.anneal.control(start = 1, end = -1, iter = 100000, update=1000) fit<-logreg(resp=BC$gradecat, bin=BC[,c(3:8,10)], type=1, select=1, ntrees=1, nleaves=5, anneal.control = anneal.params) anneal.params <- logreg.anneal.control(start = 2, end = -1, iter = 100000, update=1000) fit<-logreg(resp=BC$gradecat, bin=BC[,c(3:8,10)], type=1, select=1, ntrees=1, nleaves=5, anneal.control = anneal.params) plot(fit) ### Fitting a logistic regression tree(s) instead of a classification tree ###Using CV to select number of leaves anneal.params <- logreg.anneal.control(start = 2, end = -4, iter = 100000) logreg(resp=BC$gradecat, bin=BC[,c(3:8,10)], type=3, select=3, ntrees=c(1,2), nleaves=c(3,8), kfold=5, anneal.control = anneal.params) anneal.params <- logreg.anneal.control(start = 2, end = -4, iter = 100000, update=5000) fit<-logreg(resp=BC$gradecat, bin=BC[,c(3:8,10)], type=3, select=1, ntrees=1, nleaves=7, anneal.control = anneal.params) anneal.params <- logreg.anneal.control(start = 1, end = -3.5, iter = 100000, update=5000) fit<-logreg(resp=BC$gradecat, bin=BC[,c(3:8,10)], type=3, select=1, ntrees=1, nleaves=7, anneal.control = anneal.params) fit plot(fit)