options mlogic mprint symbolgen; %let libpath=EDA; %let rdata=rollupfinalEDA; proc contents data=&libpath..&rdata out=cont1(keep=NAME TYPE LENGTH VARNUM); run; proc sort data=cont1; by VARNUM; run; ods output nlevels=checkfreq; proc freq data=&libpath..&rdata nlevels; tables _all_ /noprint; run; ods output close; data checkfreq; set checkfreq; varcnt+1;run; data &libpath..contentfile; merge cont1 (rename=(VARNUM=varcnt) rename=(Name=TableVar)) checkfreq; by varcnt; run; proc print data=&libpath..contentfile; run; proc sql noprint; select count(*) into: NOBS from &libpath..contentfile ; quit; %macro stm(); %let dsId=%Sysfunc(open(&libpath..contentfile)); %let cvar=%Sysfunc(varnum(&dsId.,TableVar)); %let I=1; create table stat_missing as select %do %while (%Sysfunc(fetch(&dsId.)) = 0); %let var = %Sysfunc(getvarc(&dsId.,&cvar)); %if &I ne &NOBS %then coalesce(sum(missing(&var.)),0) as &var ,; %else coalesce(sum(missing(&var.)),0) as &var; %let I= %eval(&I + 1); %end; from &libpath..&rdata; %let RC=%sysfunc(close(&DsId)); %mend; %macro stc(); %let dsId=%Sysfunc(open(&libpath..contentfile)); %let cvar=%Sysfunc(varnum(&dsId.,TableVar)); %let I=1; create table stat_nonmissing as select %do %while (%Sysfunc(fetch(&dsId.))=0); %let var=%Sysfunc(getvarc(&dsId.,&cvar)); %if &I ne &NOBS &then count(&var.) as &var,; %else count(&var.) as &var; %let I=%eval(&I+1); %end; from &libpath..&rdata; %let RC=%sysfunc(close(&DsId)); %mend; proc sql; %stm(); %stc(); quit(); proc transpose data=stat_missing out=stat_trans_missing Name=Tablevar prefix=Missing; proc transpose data=stat_nonmissing out=stat_trans_nonmissing Name=Tablevar prefix=Non_Missing; proc sort data=&libpath..contentfile; by Tablevar; proc sort data=stat_trans_missing; by Tablevar; proc sort data=stat_trans_nonmissing; by Tablevar; data &libpath..final_content; merge &libpath..contentfile stat_trans_missing stat_trans_nonmissing; by tablevar; proc sort data=&libpath..final_content; by varcnt; run; proc sql noprint; select Tablevar into : varmore separated by '' from &libpath..contentfile where TYPE=1; select count(TableVar) into :vmcnt from &libpath..contentfile where TYPE=1; quit; %let vmcnt=&vmcnt; proc sql noprint; select TableVar into :v1-:v&vmcnt from &libpath..contentfile where TYPE=1; select count(varcnt) into :varcount from &libpath..contentfile where TYPE=2; %let varcount=&varcount; select tablevar into : x1-:x&varcount from &libpath..contentfile where TYPE=2; select count(*) into :obscnt from &libpath..&rdata.; quit; %macro stkorig; %do i=1 %to &vmcnt; data v&i; length NAME $32.; set &libpath..&rdata(keep=&&v&i rename=(&&v&i=origvalue)); NAME="&&v&i"; format NAME $32.; attrib _all_ label=''; run; proc univariate data=v&i noprint; var origvalue; output out=vstats&i min=Minimum max=Maximum mean=Mean std=Standard_Deviation skewness=Skewness kurtosis=Kurtosis pctlpts=0.01 0.05 0.1 0.5 1 5 10 25 50 75 90 95 99 99.5 99.9 99.95 99.99 pctlpre=pct_; run; data vstats&i; set vstats&i; length TableVar $32.; TableVar="&&v&i"; run; %end; %mend; %stkorig; data stackorig; set vstats1-vstats&vmcnt; run; proc sort data=&libpath..final_content; by Tablevar; run; proc sort data=stackorig;by Tablevar; run; data &libpath..stackfinal; merge &libpath..final_content stackorig; by Tablevar;run; %macro outshell; %do i=1 %to &varcount; proc sql noprint; create table eda&i as (select "&&x&i" as tablevar length=64 format=$64., &&x&i as tier length =64 format=$64., count(*) as Count from &libpath..&rdata group by "&&x&i", &&x&i ) order by &&x&i; quit; %end; %mend outshell; %outshell; data &libpath..edaall; set eda1-eda&varcount; run; proc sort data=&libpath..stackfinal; by Tablevar; run; proc sort data=&libpath..edaall; by Tablevar; run; data &libpath..edaall_final; merge &libpath..stackfinal &libpath..edaall; by Tablevar; run; proc sort data=&libpath..edaall_final; by TYPE; run; data &libpath..edaall_final; set &libpath..edaall_final; Total_Records=Missing1+Non_Missing1; Missing_Percentages=Missing1/Total_Records; Non_Missing_Percentages=Non_Missing1/Total_Records; if count=. then count=Non_Missing1; run; data &libpath..edaall_final (drop=length varcnt); Attrib tablevar Label=""; Attrib tier Label="tier "; Attrib Count Label="Count"; Attrib TYPE Label="TYPE "; Attrib NLevels Label="NLevels "; Attrib Missing1 Label="Missing1 "; Attrib Non_Missing1 Label="Non_Missing1 "; Attrib Total_Records Label="Total_Records "; Attrib Missing_Percentages Label="Missing_Percentages "; Attrib Non_Missing_Percentages Label="Non_Missing_Percentages"; Attrib Minimum Label="Minimum"; Attrib Maximum Label="Maximum"; Attrib Mean Label="Mean "; Attrib Standard_Deviation Label="Standard_Deviation"; Attrib Skewness Label="Skewness "; Attrib Kurtosis Label="Kurtosis "; Attrib pct_0_01 Label="Percentile at 0.01%"; Attrib pct_0_05 Label="Percentile at 0.05%"; Attrib pct_0_1 Label="Percentile at 0.1%"; Attrib pct_0_5 Label="Percentile at 0.5%"; Attrib pct_1 Label="Percentile at 1%"; Attrib pct_5 Label="Percentile at 5%"; Attrib pct_10 Label="Percentile at 10%"; Attrib pct_25 Label="Percentile at 25%"; Attrib pct_50 Label="Percentile at 50%"; Attrib pct_75 Label="Percentile at 75%"; Attrib pct_90 Label="Percentile at 90%"; Attrib pct_95 Label="Percentile at 95%"; Attrib pct_99 Label="Percentile at 99%"; Attrib pct_99_5 Label="Percentile at 99.5 %"; Attrib pct_99_9 Label="Percentile at 99.9 %"; Attrib pct_99_95 Label="Percentile at 99.95%"; Attrib pct_99_99 Label="Percentile at 99.99%"; set &libpath..edaall_final; run; proc format; value TypeF 1="Numeric" 2="Categorical"; run; data &libpath..edaall_final; set &libpath..edaall_final; format Type TypeF. format Missing_Percentages Non_Missing_Percentages percentn10.2; run; ods csv file="Univariate_Report.csv"; proc print data=&libpath..edaall_final; format Type TypeF.; format Missing_Percentages Non_Missing_Percentages percentn10.2; run; ods csv close;
... View more