Hello
In real situation I have 24 data sets (Names ABC with YYYMM) and 100 variables.
Here is code that calculate for each data set source and numeric variable the following statistics:
Number of rows
Number of rows with missing value
Number of rows with Positive value
Number of rows with negative value
number of distinct values (included null value)
The problem is that I need to run on 24 months and on 100 variables.
My question-
Can you show the efficient way (less code rows) to run this macro?
Here I run it with 12 rows of macro run but I guess it can be done with one row only.
Thank you
Data ABC202401;
Input ID X Y Z;
cards;
1 10 20 30
2 11 21 31
3 12 31 41
4 10 15 20
5 30 40 50
6 5 10 .
7 . . -100
;
Run;
Data ABC202402;
Input ID X Y Z;
cards;
1 15 30 17
2 18 21 43
3 31 50 41
3 8 18 28
;
Run;
Data ABC202403;
Input ID X Y Z;
cards;
1 43 23 62
2 21 16 .
3 47 50 .
4 -10 -20 -30
;
Run;
Data ABC202404;
Input ID X Y Z;
cards;
1 -70 14 62
2 -15 42 45
3 27 . .
;
Run;
%macro RRR_numeric_vars(month,VAR);
proc sql;
create table _summary_ as
select "&Var." as var,"&month." as month,
count(*) as nr_Rows,
sum(case when &Var.>0 then 1 else 0 end ) as nr_Rows_POS,
sum(case when &Var.=0 then 1 else 0 end ) as nr_Rows_ZERO,
sum(case when &Var.<0 AND &Var. ne . then 1 else 0 end ) as nr_Rows_NEG_no_missig,
sum(case when &Var.=. then 1 else 0 end ) as nr_Rows_Missing
from ABC&month.
;
quit;
proc append data=_summary_ base=Summary_All force;quit;
%mend RRR_numeric_vars;
/*proc delete data=Summary_All;Run;*/
%RRR_numeric_vars(month=202401,VAR=X);
%RRR_numeric_vars(month=202402,VAR=X);
%RRR_numeric_vars(month=202403,VAR=X);
%RRR_numeric_vars(month=202404,VAR=X);
%RRR_numeric_vars(month=202401,VAR=Y);
%RRR_numeric_vars(month=202402,VAR=Y);
%RRR_numeric_vars(month=202403,VAR=Y);
%RRR_numeric_vars(month=202404,VAR=Y);
%RRR_numeric_vars(month=202401,VAR=Z);
%RRR_numeric_vars(month=202402,VAR=Z);
%RRR_numeric_vars(month=202403,VAR=Z);
%RRR_numeric_vars(month=202404,VAR=Z);
proc print data=Summary_All noobs;Run;
... View more