data <- read.csv("I:\\classes\\regression 2009\\data\\senicfull.csv") par(mfrow=c(3,3)) attach(data) boxplot(LOS) boxplot(AGE) boxplot(INFRISK) boxplot(CULT) boxplot(XRAY) boxplot(CENSUS) boxplot(NURSE) boxplot(FACS) boxplot(BEDS) logLOS <- log(LOS) invLOS <- -1/LOS logCULT <- log(CULT) sqrtCULT <- sqrt(CULT) sqrtINF <- sqrt(INFRISK) pairs(data.frame(logLOS, sqrtINF, sqrtCULT)) pairs(data.frame(LOS, AGE, XRAY, CENSUS, NURSE, FACS, BEDS)) pairs(data.frame(INFRISK, AGE, XRAY, CENSUS, NURSE, FACS, BEDS)) pairs(data.frame(CULT, AGE, XRAY, CENSUS, NURSE, FACS, BEDS)) round(cor(data[,-1]),2) reg1 <- lm(logLOS ~ INFRISK) reg2 <- lm(logLOS ~ CULT) reg3 <- lm(logLOS ~ sqrtCULT) reg4 <- lm(logLOS ~ AGE) reg5 <- lm(logLOS ~ XRAY) #################################### inf2 <- INFRISK^2 mlr1 <- lm(logLOS ~ INFRISK + inf2+ sqrtCULT + AGE + XRAY + CENSUS) summary(mlr1) mlr2 <- lm(logLOS ~ INFRISK + inf2+ AGE + XRAY + CENSUS) summary(mlr2) plot(INFRISK, logLOS) mlr3 <- lm(logLOS ~ inf2+ AGE + XRAY + CENSUS) summary(mlr3) mlr4 <- lm(logLOS ~ INFRISK + AGE + XRAY + CENSUS) summary(mlr4) par(mfrow=c(2,2)) res2 <- mlr2$residuals fit2 <- mlr2$fitted.values res3 <- mlr3$residuals fit3 <- mlr3$fitted.values res4 <- mlr4$residuals fit4 <- mlr4$fitted.values plot(INFRISK, (res2)) abline(h=0) plot(INFRISK, (res3)) abline(h=0) plot(INFRISK, (res4)) abline(h=0) r1 <- lm(logLOS ~ AGE + XRAY + CENSUS) r2 <- lm(inf2 ~ AGE + XRAY + CENSUS) par(mfrow=c(1,1)) plot(r2$residuals, r1$residuals) abline(lm(r1$residuals~r2$residuals)) (1:113)[r2$residuals>30] r1 <- lm(logLOS[-54] ~ AGE[-54] + XRAY[-54] + CENSUS[-54]) r2 <- lm(inf2[-54] ~ AGE[-54] + XRAY[-54] + CENSUS[-54]) abline(lm(r1$residuals~r2$residuals), col=2) mlr5 <- lm(logLOS ~ inf2+ AGE + XRAY + CENSUS +factor(REGION) + factor(MEDSCHL)) summary(mlr5) mlr5 <- lm(logLOS ~ inf2+ AGE + XRAY + CENSUS +factor(REGION) + factor(MEDSCHL)) summary(mlr5) mlr6 <- lm(logLOS ~ inf2+ AGE + XRAY + CENSUS +factor(REGION) ) summary(mlr6) mlr7 <- lm(logLOS ~ inf2+ AGE + CENSUS + factor(REGION) ) summary(mlr7) # inference with logLOS and inf2 mean(AGE) mean(CENSUS) mean(XRAY) beta <- mlr6$coefficients x1 <- c(1, 2^2, 53, 82, 191, 0, 0, 0) x2 <- c(1, 3^2, 53, 82, 191, 0, 0, 0) exp(sum(x1*beta)) exp(sum(x2*beta)) xx <- seq(1.5,7.0,0.1) diflos <- rep(NA, length(xx)) for(i in 1:length(xx)) { y1 <- sum(beta*c(1, xx[i]^2, 53, 82, 191, 0, 0, 0)) y2 <- sum(beta*c(1, (xx[i]+1)^2, 53, 82, 191, 0, 0, 0)) diflos[i] <- exp(y2)-exp(y1) } plot(xx, diflos, type="l", lwd=2) #################### # LECTURE 13 av1 <- lm(logLOS ~ AGE + XRAY + CENSUS + factor(REGION) ) av2 <- lm(INFRISK ~ AGE + XRAY + CENSUS + factor(REGION) ) resy <- av1$residuals resx <- av2$residuals plot(resx, resy, pch=16) abline(lm(resy~resx), lwd=2) ##################### par(mfrow=c(1,1)) mlr8 <- lm(logLOS ~ AGE + XRAY + CENSUS + factor(REGION)) smoother <- lowess(INFRISK, mlr8$residuals) plot(INFRISK, mlr8$residuals) lines(smoother) infrisk.star <- ifelse(INFRISK>4,INFRISK-4,0) mlr9 <- lm(logLOS ~ INFRISK + infrisk.star + AGE + XRAY + CENSUS + factor(REGION)) summary(mlr9) par(mfrow=c(1,2)) plot(mlr9$fitted.values, mlr9$residuals, pch=16) abline(h=0) plot(mlr7$fitted.values, mlr7$residuals, pch=16) abline(h=0) par(mfrow=c(1,2)) plot(INFRISK, mlr9$residuals, pch=16) abline(h=0) plot(INFRISK, mlr7$residuals, pch=16) abline(h=0) ######################################### anova(mlr8) mlr9 <- lm(logLOS ~ AGE + XRAY + CENSUS + factor(REGION) + INFRISK + infrisk.star) mlr7 <- lm(logLOS ~ AGE + XRAY + CENSUS + factor(REGION) + inf2) anova(mlr9) anova(mlr7) ################ x <- rchisq(100,5) y <- 2*x + rnorm(100,0,5) plot(x,y) x[101:102] <-c(5, 15) y[101:102] <- c(45,5) plot(x,y, pch=16) reg <- lm(y~x) plot(reg) hat <- hatvalues(reg) plot(1:102, hat) highhat <- ifelse(hat>0.10,1,0) plot(x,y) points(x[highhat==1], y[highhat==1], col=2, pch=16, cex=1.5) hat9 <- hatvalues(mlr9) plot(1:length(hat9), hat9) highhat <- ifelse(hat9>0.15,1,0) plot(mlr9$fitted.values, mlr9$residuals) points(mlr9$fitted.values[highhat==1], mlr9$residuals[highhat==1], col=2, pch=16, cex=1.5) #################################### ei <- reg$residuals ti <- rstudent(reg) plot(ti,ei) plot(hat, ti) plot(hat, ei) plot(reg$fitted.values, ti) abline(h=c(-2,2)) ###################################### ei <- mlr9$residuals ti <- rstudent(mlr9) plot(ti,ei) plot(hat9, ti) plot(hat9, ei) plot(mlr9$fitted.values, ti) abline(h=c(-2,2))