%macro VLDS(libname=WORK, data=, method=srs, weight=, 
            subjid=, time=, depvar=, indvar=, 
            vartype=, odspath=, odsfile=/
            des=SAS(r) System Macro for Analysis of Very Large Data Sets
           );
   %* This SAS(r) System Macro is used for fitting linear mixed models and
      general linear mixed models for Gaussian and binomial distributions. It 
      has been written to assist users in analyzing very large data sets. To 
      decrease run time the SAS(r) System Macro assumes ID and TIME variables 
      are numeric and checks that they are numeric before proceeding with analyses.
      If this is not the case, numeric IDs should be created as this will 
      substantially negatively impact performance.

      Input:         libname = SAS Libname (default is WORK),
                     data    = input SAS(r) data set name,
                     method  = kewords SRS, STRS (default is SRS),
                     weight  = the weighting variable used when using randomly
                               sampled data, the variable will relect the sampling 
                               scheme either SRS or stratified random sampling (STRS)
                               by default no weight (or a weight of 1) is assumed,
                     subjid  = subject identification number (must be numeric),
                     time    = time variable (must be numeric),
                     depvar  = dependent variable,
                     vartype = type of variable dependent variable is either binary
                               or continuous - keyword is BINARY, if any other word
                               is used the macro assumes the dependent variable is
                               continuous,
                     odspath = the path where the user would like the output written, 
                     odsfile = the file name of the RTF file to be written with results
                               of the analysis.

      Output:        Two types of output are generated. SAS(r) data sets and "hard" copy 
                     output in the form of an RTF file. The SAS(r) data set names are:
                     EST (parameter estimates), FIT (with the fit statistics), and 
                     PVALUES (pvalues) and 95% confidence intervals for the parameter 
                     estimates (on the FIT data set).

      Example Usage: %vlds(libname=PROJ1, data=test, subjid=id, time=time, depvar=a1cge8, 
                           indvar=nhb hispanic other male mstat svcpct urban, 
                           vartype=binary, 
                           odspath=\\v07.med.va.gov\cha\TREP\___new organization\workgroups, 
                           odsfile=test)<semi-colon>

                     %vlds(libname=PROJ1, data=test, subjid=id, time=time, depvar=a1cge8, 
                           indvar=nhb hispanic other male mstat svcpct urban, 
                           vartype=continuous, 
                           odspath=\\v07.med.va.gov\cha\TREP\___new organization\workgroups, 
                           odsfile=test)<semi-colon>


      For comments or suggestions, please send comments to: g.eastham.gilbert@gmail.com

      Permission is granted for educational and research use of this SAS(r) System Macro.
      Copyright (r) 2011 Gregory E. Gilbert and Mulugeta Gebregziabher
   ;

   %* Define the SAS(r) library name and upcase it because PROC SQL is case
      sensitive
   ;
   %let lib =%upcase(&libname);

   %* Define the SAS(r) data set name and upcase it because PROC SQL is case
      sensitive
   ;
   %let dsn =%upcase(&data);

   %*Test to make sure the ID variable is numeric;
   proc sql noprint;
      select type into :col_type
      from dictionary.columns
      where libname     ="&lib"
      and   memname     ="&dsn"
      and   upcase(name)=%upcase("&subjid");
   quit;

   %* Exit the SAS(r) System Macro if the ID variable is not numeric.;
   %if &col_type ne num %then %do;
       %put ERROR:  ID Variable **MUST BE** numeric.;
       %goto exit;
   %end;

   %*Test to make sure the TIME variable is numeric;
   proc sql noprint;
      select type into :col_type
      from dictionary.columns
      where libname     ="&lib"
      and   memname     ="&dsn"
      and   upcase(name)=%upcase("&time");
   quit;

   %* Exit the SAS(r) System Macro if the TIME variable is not numeric.;
   %if &col_type ne num %then %do;
       %put ERROR:  TIME Variable **MUST BE** numeric.;
       %goto exit;
   %end;

   %* Define the subject identification variable and the time variable;
   %let id  =&subjid;
   %let t   =&time;

   %* Define the dependent variable;
   %let dv  =&depvar;

   %* Define the independent variables;
   %let iv  =&indvar;

   %* Sort data by ID and TIME for faster processing;
   proc sort data=&dsn out=_sorted_; by &id &time; run; quit;

   %* Create output data sets with parameter estimates, fit statistics and Type III p-values;
   ods output ParameterEstimates=est FitStatistics=fit Tests3=pvalues;

   %* Execute the LMM or GLMM model depending upon the VARiable TYPE specified
      in the SAS Macro call;
   proc glimmix data=_sorted_ ic=pq noclprint;
      model   &dv=&id &t &iv / cl chisq covb ddfm=none 
                  %if %upcase(&VARTYPE) ne BINARY %then %do; dist=gaussian link=identity; %end;
                     %else %if %upcase(&VARTYPE) eq BINARY %then %do; dist=binomial link=logit; %end;
      %if &WEIGHT ne %then %do; weight &weight; %end;
      random  int / subject=&id;
      title1;
   run; quit;

   %* Remove any procedure labels in the body ot the ODS output leaving
      user-defined titles in place.;
   ods noptitle;

   %* Print the parameter estimates, model fit statistics, and p-values of the LMM/GLMM;
   ods rtf style=journal file="&odspath\&odsfile..rtf";
      %if %upcase(&METHOD) eq SRS %then %do;
	     footnote1 justify=left "Sampling Method: Simple Random Sampling (SRS)";
	  %end;
         %else %if %upcase(&METHOD) eq STRS %then %do;
	        footnote1 justify=left "Sampling Method: Stratified Random Sampling (STRS)";
	     %end;    
      proc print data=est noobs;     title1 "Parameter Estimates";         run; quit;
      proc print data=fit noobs;     title1 "Model Fit Statistics";        run; quit;
      proc print data=pvalues noobs; title1 "P-Values for Type III Tests"; run; quit;
   title1;
   footnote1;
   ods rtf close;
%exit: %mend  VLDS;


****** REMR***********;
* 1. run the above GLIIMX macro for each VISN;
* 2. save the output into sample.ldata_continuous_all_visns ;
* 3. Create COEFFICIENT data set;
data cc(keep=iv_visn visn estimate weight index=(idx=(iv_visn weight)));
   length iv_visn $ 15 estimate  8.  ;
   merge  coefficient(in=coeff) sremr(keep=visn weight)  ; 
   by     visn;
   if coeff eq 1;
   iv_visn=compress(cat(iv,"_",visn));
run; quit;

data remr.continuous_coefficients_all_visns;
   set cc;
   by  iv_visn weight;
   if first.weight eq 1;
run; quit;

*  Prepare the COVARIANCE data set for input into the LDATA  option on the RANDOM statement. The COV data set must be
   diagonal. This assumes there are two EXTRA variables on the data set:  ROW and PARM;
%macro prepcov(indsn=, outdsn=);
proc sql noprint;
   select distinct count(*) into: nobsY
   from &indsn;
quit;
%let nobs=%sysfunc(compress(&nobsY));

%let var=%eval(%sysfunc(attrn(%sysfunc(open(&indsn,i)),nvars))-3);

%let group=%eval(&nobs/&var);

data &outdsn;
   length parm row 8.;
   set &indsn(keep=col:);
   row=_n_;
   parm=1;
   %do i=%eval(&var+1) %to &nobs;
      col&i=0;
   %end;
   
   %do g=2 %to &group;
      if %eval((&g-1)*&var+1) le _n_ le %eval(&g*&var) then do;
         %do i=%eval((&g-1)*&var+1) %to %eval(&g*&var);
           col&i=col%eval(&i-%eval((&g-1)*&var));
           col%eval(&i-%eval((&g-1)*&var))=0;
	     %end;
      end;
   %end;

   run; quit;
%mend  prepcov;

%prepcov(indsn=CovB, outdsn=remr.ldata_continuous_all_visns);

ods output lsmeans=lsmeans2(keep=effect iv visn estimate stderr probt lower upper)  
fitstatistics=fit(where=(substr(descr, 1, 4) eq "AIC" or substr(descr, 1, 4) eq "BIC" ));
proc glimmix data=remr.continuous_coefficients_all_visns order=data noclprint ic=pq;
   class  iv visn   ;
   model     estimate= iv visn /noint df=2119; * df is average number of subjects in each VISN;
   weight    weight;
   random    visn / type=lin(1) ldata=remr.ldata_continuous_all_visns   ; * covariance estimates from the above model;
   parms     (1) (1)   / noiter;
   lsmeans   iv  /cl;
   lsmeans   visn /adjust=smm  diff df=22 alpha=0.00025; *SMM=studentized maximum modulus ;
   title1 "Random Effects Meta Regression with SMM adjsusted homogneity test ";
run;