%macro VLDS(libname=WORK, data=, method=srs, weight=, subjid=, time=, depvar=, indvar=, vartype=, odspath=, odsfile=/ des=SAS(r) System Macro for Analysis of Very Large Data Sets ); %* This SAS(r) System Macro is used for fitting linear mixed models and general linear mixed models for Gaussian and binomial distributions. It has been written to assist users in analyzing very large data sets. To decrease run time the SAS(r) System Macro assumes ID and TIME variables are numeric and checks that they are numeric before proceeding with analyses. If this is not the case, numeric IDs should be created as this will substantially negatively impact performance. Input: libname = SAS Libname (default is WORK), data = input SAS(r) data set name, method = kewords SRS, STRS (default is SRS), weight = the weighting variable used when using randomly sampled data, the variable will relect the sampling scheme either SRS or stratified random sampling (STRS) by default no weight (or a weight of 1) is assumed, subjid = subject identification number (must be numeric), time = time variable (must be numeric), depvar = dependent variable, vartype = type of variable dependent variable is either binary or continuous - keyword is BINARY, if any other word is used the macro assumes the dependent variable is continuous, odspath = the path where the user would like the output written, odsfile = the file name of the RTF file to be written with results of the analysis. Output: Two types of output are generated. SAS(r) data sets and "hard" copy output in the form of an RTF file. The SAS(r) data set names are: EST (parameter estimates), FIT (with the fit statistics), and PVALUES (pvalues) and 95% confidence intervals for the parameter estimates (on the FIT data set). Example Usage: %vlds(libname=PROJ1, data=test, subjid=id, time=time, depvar=a1cge8, indvar=nhb hispanic other male mstat svcpct urban, vartype=binary, odspath=\\v07.med.va.gov\cha\TREP\___new organization\workgroups, odsfile=test) %vlds(libname=PROJ1, data=test, subjid=id, time=time, depvar=a1cge8, indvar=nhb hispanic other male mstat svcpct urban, vartype=continuous, odspath=\\v07.med.va.gov\cha\TREP\___new organization\workgroups, odsfile=test) For comments or suggestions, please send comments to: g.eastham.gilbert@gmail.com Permission is granted for educational and research use of this SAS(r) System Macro. Copyright (r) 2011 Gregory E. Gilbert and Mulugeta Gebregziabher ; %* Define the SAS(r) library name and upcase it because PROC SQL is case sensitive ; %let lib =%upcase(&libname); %* Define the SAS(r) data set name and upcase it because PROC SQL is case sensitive ; %let dsn =%upcase(&data); %*Test to make sure the ID variable is numeric; proc sql noprint; select type into :col_type from dictionary.columns where libname ="&lib" and memname ="&dsn" and upcase(name)=%upcase("&subjid"); quit; %* Exit the SAS(r) System Macro if the ID variable is not numeric.; %if &col_type ne num %then %do; %put ERROR: ID Variable **MUST BE** numeric.; %goto exit; %end; %*Test to make sure the TIME variable is numeric; proc sql noprint; select type into :col_type from dictionary.columns where libname ="&lib" and memname ="&dsn" and upcase(name)=%upcase("&time"); quit; %* Exit the SAS(r) System Macro if the TIME variable is not numeric.; %if &col_type ne num %then %do; %put ERROR: TIME Variable **MUST BE** numeric.; %goto exit; %end; %* Define the subject identification variable and the time variable; %let id =&subjid; %let t =&time; %* Define the dependent variable; %let dv =&depvar; %* Define the independent variables; %let iv =&indvar; %* Sort data by ID and TIME for faster processing; proc sort data=&dsn out=_sorted_; by &id &time; run; quit; %* Create output data sets with parameter estimates, fit statistics and Type III p-values; ods output ParameterEstimates=est FitStatistics=fit Tests3=pvalues; %* Execute the LMM or GLMM model depending upon the VARiable TYPE specified in the SAS Macro call; proc glimmix data=_sorted_ ic=pq noclprint; model &dv=&id &t &iv / cl chisq covb ddfm=none %if %upcase(&VARTYPE) ne BINARY %then %do; dist=gaussian link=identity; %end; %else %if %upcase(&VARTYPE) eq BINARY %then %do; dist=binomial link=logit; %end; %if &WEIGHT ne %then %do; weight &weight; %end; random int / subject=&id; title1; run; quit; %* Remove any procedure labels in the body ot the ODS output leaving user-defined titles in place.; ods noptitle; %* Print the parameter estimates, model fit statistics, and p-values of the LMM/GLMM; ods rtf style=journal file="&odspath\&odsfile..rtf"; %if %upcase(&METHOD) eq SRS %then %do; footnote1 justify=left "Sampling Method: Simple Random Sampling (SRS)"; %end; %else %if %upcase(&METHOD) eq STRS %then %do; footnote1 justify=left "Sampling Method: Stratified Random Sampling (STRS)"; %end; proc print data=est noobs; title1 "Parameter Estimates"; run; quit; proc print data=fit noobs; title1 "Model Fit Statistics"; run; quit; proc print data=pvalues noobs; title1 "P-Values for Type III Tests"; run; quit; title1; footnote1; ods rtf close; %exit: %mend VLDS; ****** REMR***********; * 1. run the above GLIIMX macro for each VISN; * 2. save the output into sample.ldata_continuous_all_visns ; * 3. Create COEFFICIENT data set; data cc(keep=iv_visn visn estimate weight index=(idx=(iv_visn weight))); length iv_visn $ 15 estimate 8. ; merge coefficient(in=coeff) sremr(keep=visn weight) ; by visn; if coeff eq 1; iv_visn=compress(cat(iv,"_",visn)); run; quit; data remr.continuous_coefficients_all_visns; set cc; by iv_visn weight; if first.weight eq 1; run; quit; * Prepare the COVARIANCE data set for input into the LDATA option on the RANDOM statement. The COV data set must be diagonal. This assumes there are two EXTRA variables on the data set: ROW and PARM; %macro prepcov(indsn=, outdsn=); proc sql noprint; select distinct count(*) into: nobsY from &indsn; quit; %let nobs=%sysfunc(compress(&nobsY)); %let var=%eval(%sysfunc(attrn(%sysfunc(open(&indsn,i)),nvars))-3); %let group=%eval(&nobs/&var); data &outdsn; length parm row 8.; set &indsn(keep=col:); row=_n_; parm=1; %do i=%eval(&var+1) %to &nobs; col&i=0; %end; %do g=2 %to &group; if %eval((&g-1)*&var+1) le _n_ le %eval(&g*&var) then do; %do i=%eval((&g-1)*&var+1) %to %eval(&g*&var); col&i=col%eval(&i-%eval((&g-1)*&var)); col%eval(&i-%eval((&g-1)*&var))=0; %end; end; %end; run; quit; %mend prepcov; %prepcov(indsn=CovB, outdsn=remr.ldata_continuous_all_visns); ods output lsmeans=lsmeans2(keep=effect iv visn estimate stderr probt lower upper) fitstatistics=fit(where=(substr(descr, 1, 4) eq "AIC" or substr(descr, 1, 4) eq "BIC" )); proc glimmix data=remr.continuous_coefficients_all_visns order=data noclprint ic=pq; class iv visn ; model estimate= iv visn /noint df=2119; * df is average number of subjects in each VISN; weight weight; random visn / type=lin(1) ldata=remr.ldata_continuous_all_visns ; * covariance estimates from the above model; parms (1) (1) / noiter; lsmeans iv /cl; lsmeans visn /adjust=smm diff df=22 alpha=0.00025; *SMM=studentized maximum modulus ; title1 "Random Effects Meta Regression with SMM adjsusted homogneity test "; run;