*Chapter 8: Gonen- Analyzing ROC using SAS; * Liver surgery example; /* Liver data has records from 554 surgeries that were performed to remove liver tumors. varaiables include: demographics, pre-operative patient characteristics such as comorbidities, operative variables such as blood loss and post operative variables such as incidence and severity of complications following surgery. Predicting the likelihood of complications before surgery enables the treating team of physcians and nurses to increase post operative monitoring of the patient as necessary and for counseling the patient and family */; %include "I:\BMTRY748\mulugeta\sas examples\roc and validation macros.sas"; %include "I:\BMTRY748\mulugeta\sas examples\roc.sas"; libname lec5 "I:\BMTRY748\mulugeta\sas examples\roc"; data liver; set lec5.liver; run; * prediction using all predictors; proc logistic data=liver; model complications=age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec; output out=out p=p_1; run; * we can calculate the sensitivity and specificity using a single cutoff value of p_1=0.5; * how well does the model discriminate using p_1=0.5; * you can try more cutoff values to make an ROC curve; data validation; set out; if P_1 > .50 then guessYes = 1; * if predicted value is more than 0.5 positive prediction; else guessYes = 0; run; proc freq data=validation; tables complications*guessyes/nocol nopercent; run; * SAS can provide ROC curve using; ods graphics on; proc logistic data=liver outest=parms1 descending; model complications=age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec / ctable pprob = (0 to 1 by 0.1) outroc=roc1 lackfit rsquare; run; proc score data=liver score = parms1 out=scored1 type=parms; var age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec ; run; * prediction with variable selection; proc logistic data=liver outest=parms2 descending; model complications=age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec / selection=stepwise ctable pprob = (0 to 1 by 0.1) outroc=roc1;; run; * Calibration uisng Hosmer and Lemeshow Test; proc logistic data=liver outest=parms2 descending; model complications=age_at_op numsegs_resec / ctable pprob = (0 to 1 by 0.1) outroc=roc1 lackfit rsquare; *output out=pred p=phat lower=lcl upper=ucl predprob=(individual crossvalidate); *useful for leave one-out validation; run; * user specified model; ods graphics on; proc logistic data=liver outest=parms1 descending; model complications=age_at_op bilat_resec_code numsegs_resec bilat_resec_code*numsegs_resec/ ctable pprob = (0 to 1 by 0.1) outroc=roc1 lackfit; run; * split sample validation; * split data into training and test parts; data training test; set liver; seed=12061996; unif=ranuni(seed); if unif>0.333 then output training; else output test; run; proc logistic data=training outmodel=model1; model complications = age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec / selection=s; run; * use test data to perform validation using the score statement or using %roc macro belwo; ods graphics on; proc logistic inmodel=model1; score data=test out=testscore outroc=rocvalid fitstat; run; %roc(data=testscore, var=p_1, response=complications, contrast=1, details=no, alpha=.05); %rocplot(dsn=testscore,marker=p1,gold=COMPLICATIONS,anno=4,tlist=.35 .51 .58); /*leave one out validation**/ proc logistic data=liver; model complications=age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec / selection=s; output out=outx predprobs=x; ods output classification=classroc; run; %roc(data=outx, var=xp_1, response=complications, contrast=1); *k-fold validation; /*steps: 1. randomly divide your data into k=5 parts, 2. hold aside the first 20% (test data set) and fit logisitc regression using the remaining 80% (training data set), 3. calculate the predicted probability for each observation in the test dataset, 4. repeat this 4 times, 5. now you have predicted probability for each observation that was not based on that observation; */; %xval(dsn=liver,outcome=complications,covars=age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec,k=5); %rocplot(_xval_,P_1,COMPLICATIONS,anno=2); %roc(data=_xval_,var=P_1,response=COMPLICATIONS,CONTRAST=1); * bootstrap validation; * you can use the macro bval for doing this; %bval(dsn=liver,outcome=complications,covars=age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec,B=10); * to see how you can do bootsrap using SAS; %macro bootstrap (Nsamples); proc surveyselect data=liver method=urs n=554 rep=&nsamples. out=boot; run; proc logistic data=boot outtest=estimates; model complications = age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec ; freq numberhits; by replicate; run; proc means data=estimates n mean std ; var intercept complications = age_at_op comorb lobeormore_code bilat_resec_code numsegs_resec ; title 'bootstrap results'; run; %mend; %bootstrap(500);