Hello
I have a data set with wide structure and each customer id has one row with multiple columns of RevRatio (Each column represent another month. Month1 is last month, month 2 is one month before last month and so on)
I want to calculate the change in PCT between each month and next month and also calculate indicators.
IS there another way to calculate it instead of the macro or the macro I wrote is the best way?
Data have;
input CustID RevRatio1-RevRatio12;
cards;
1 0.9 0.8 0.8 0.7 0.9 0.9 0.7 0.6 0.9 0.9 0.7 0.6
2 0.6 0.1 0.2 0.3 0.2 0.4 0.5 1.1 0.9 0.7 0.8 0.9
3 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
4 -9 0.2 0.1 0.4 0.5 -9 -8 0.1 0.2 0.3 0.4 0.1
;
Run;
/*********** Way1-write it without macro****************/
/*********** Way1-write it without macro****************/
/*********** Way1-write it without macro****************/
%let Ratio=RevRatio;
Data want;
set have;
IF &Ratio.2 not in (0,-6,-7,-8,-9,.) and &Ratio.1 not in (-6,-7,-8,-9,.) then PCT_change_2_1=(&Ratio.1-&Ratio.2)/&Ratio.2;
IF &Ratio.3 not in (0,-6,-7,-8,-9,.) and &Ratio.2 not in (-6,-7,-8,-9,.) then PCT_change_3_2=(&Ratio.2-&Ratio.3)/&Ratio.3;
IF &Ratio.4 not in (0,-6,-7,-8,-9,.) and &Ratio.3 not in (-6,-7,-8,-9,.) then PCT_change_4_3=(&Ratio.3-&Ratio.4)/&Ratio.4;
IF &Ratio.5 not in (0,-6,-7,-8,-9,.) and &Ratio.4 not in (-6,-7,-8,-9,.) then PCT_change_5_4=(&Ratio.4-&Ratio.5)/&Ratio.5;
IF &Ratio.6 not in (0,-6,-7,-8,-9,.) and &Ratio.5 not in (-6,-7,-8,-9,.) then PCT_change_6_5=(&Ratio.5-&Ratio.6)/&Ratio.6;
IF &Ratio.7 not in (0,-6,-7,-8,-9,.) and &Ratio.6 not in (-6,-7,-8,-9,.) then PCT_change_7_6=(&Ratio.6-&Ratio.7)/&Ratio.7;
IF &Ratio.8 not in (0,-6,-7,-8,-9,.) and &Ratio.7 not in (-6,-7,-8,-9,.) then PCT_change_8_7=(&Ratio.7-&Ratio.8)/&Ratio.8;
IF &Ratio.9 not in (0,-6,-7,-8,-9,.) and &Ratio.8 not in (-6,-7,-8,-9,.) then PCT_change_9_8=(&Ratio.8-&Ratio.9)/&Ratio.9;
IF &Ratio.10 not in (0,-6,-7,-8,-9,.) and &Ratio.9 not in (-6,-7,-8,-9,.) then PCT_change_10_9=(&Ratio.9-&Ratio.10)/&Ratio.10;
IF &Ratio.11 not in (0,-6,-7,-8,-9,.) and &Ratio.10 not in (-6,-7,-8,-9,.) then PCT_change_11_10=(&Ratio.10-&Ratio.11)/&Ratio.11;
IF &Ratio.12 not in (0,-6,-7,-8,-9,.) and &Ratio.11 not in (-6,-7,-8,-9,.) then PCT_change_12_11=(&Ratio.11-&Ratio.12)/&Ratio.12;
IF PCT_change_2_1>0.05 AND &Ratio.1>0.5 then Ind_2_1=1; /****השינוי מחודש קודם גדול מ5% והיחס בחודש יעד גדול מ50%****/
else IF &Ratio.1>1 and &Ratio.2<1 then Ind_2_1=1;/***מעבר ממצב של לא חורג למצב של חורג***/
else IF &Ratio.2 ne -9 and &Ratio.1 = -9 then Ind_2_1=1; /***בחודש יעד מנצל לא מסגרת ובחודש קודם במצב אחר****/
else IF &Ratio.1>0.5 AND &Ratio.2 in (0,-8,-9) then Ind_2_1=1; /***יחס בחודש יעד גדול מ50% ובחודש קודם יחס היה 0 או מינוס 99999,99998***/
else Ind_2_1=0;
IF PCT_change_3_2>0.05 AND &Ratio.2>0.5 then Ind_3_2=1;
else IF &Ratio.2>1 and &Ratio.3<1 then Ind_3_2=1;
else IF &Ratio.3 ne -9 and &Ratio.2 = -9 then Ind_3_2=1;
else IF &Ratio.2>0.5 AND &Ratio.3 in (0,-8,-9) then Ind_3_2=1;
else Ind_3_2=0;
IF PCT_change_4_3>0.05 AND &Ratio.3>0.5 then Ind_4_3=1;
else IF &Ratio.3>1 and &Ratio.4<1 then Ind_4_3=1;
else IF &Ratio.4 ne -9 and &Ratio.3 = -9 then Ind_4_3=1;
else IF &Ratio.3>0.5 AND &Ratio.4 in (0,-8,-9) then Ind_4_3=1;
else Ind_4_3=0;
IF PCT_change_5_4>0.05 AND &Ratio.4>0.5 then Ind_5_4=1;
else IF &Ratio.4>1 and &Ratio.5<1 then Ind_5_4=1;
else IF &Ratio.5 ne -9 and &Ratio.4 = -9 then Ind_5_4=1;
else IF &Ratio.4>0.5 AND &Ratio.5 in (0,-8,-9) then Ind_5_4=1;
else Ind_5_4=0;
IF PCT_change_6_5>0.05 AND &Ratio.5>0.5 then Ind_6_5=1;
else IF &Ratio.5>1 and &Ratio.6<1 then Ind_6_5=1;
else IF &Ratio.6 ne -9 and &Ratio.5 = -9 then Ind_6_5=1;
else IF &Ratio.5>0.5 AND &Ratio.6 in (0,-8,-9) then Ind_6_5=1;
else Ind_6_5=0;
IF PCT_change_7_6>0.05 AND &Ratio.6>0.5 then Ind_7_6=1;
else IF &Ratio.6>1 and &Ratio.7<1 then Ind_7_6=1;
else IF &Ratio.7 ne -9 and &Ratio.6 = -9 then Ind_7_6=1;
else IF &Ratio.6>0.5 AND &Ratio.7 in (0,-8,-9) then Ind_7_6=1;
else Ind_7_6=0;
IF PCT_change_8_7>0.05 AND &Ratio.7>0.5 then Ind_8_7=1;
else IF &Ratio.7>1 and &Ratio.8<1 then Ind_8_7=1;
else IF &Ratio.8 ne -9 and &Ratio.7 = -9 then Ind_8_7=1;
else IF &Ratio.7>0.5 AND &Ratio.8 in (0,-8,-9) then Ind_8_7=1;
else Ind_8_7=0;
IF PCT_change_9_8>0.05 AND &Ratio.8>0.5 then Ind_9_8=1;
else IF &Ratio.8>1 and &Ratio.9<1 then Ind_9_8=1;
else IF &Ratio.9 ne -9 and &Ratio.8 = -9 then Ind_9_8=1;
else IF &Ratio.8>0.5 AND &Ratio.9 in (0,-8,-9) then Ind_9_8=1;
else Ind_9_8=0;
IF PCT_change_10_9>0.05 AND &Ratio.9>0.5 then Ind_10_9=1;
else IF &Ratio.9>1 and &Ratio.10<1 then Ind_10_9=1;
else IF &Ratio.10 ne -9 and &Ratio.9 = -9 then Ind_10_9=1;
else IF &Ratio.9>0.5 AND &Ratio.10 in (0,-8,-9) then Ind_10_9=1;
else Ind_10_9=0;
IF PCT_change_11_10>0.05 AND &Ratio.10>0.5 then Ind_11_10=1;
else IF &Ratio.10>1 and &Ratio.11<1 then Ind_11_10=1;
else IF &Ratio.11 ne -9 and &Ratio.10 = -9 then Ind_11_10=1;
else IF &Ratio.10>0.5 AND &Ratio.11 in (0,-8,-9) then Ind_11_10=1;
else Ind_11_10=0;
IF PCT_change_12_11>0.05 AND &Ratio.11>0.5 then Ind_12_11=1;
else IF &Ratio.11>1 and &Ratio.12<1 then Ind_12_11=1;
else IF &Ratio.12 ne -9 and &Ratio.11 = -9 then Ind_12_11=1;
else IF &Ratio.11>0.5 AND &Ratio.12 in (0,-8,-9) then Ind_12_11=1;
else Ind_12_11=0;
Run;
/***********Way2-Try to write it in macro code****************/
/***********Way2-Try to write it in macro code****************/
/***********Way2-Try to write it in macro code****************/
/***********Way2-Try to write it in macro code****************/
Data want2;
set have;
%macro RRR(Ratio,from,To);
IF &Ratio.2 not in (0,-6,-7,-8,-9,.) and &Ratio.1 not in (-6,-7,-8,-9,.) then PCT_change_&From._&To.=(&Ratio.1-&Ratio.2)/&Ratio.2;
IF PCT_change_&From._&To.>0.05 AND &Ratio.1>0.5 then Ind_&From._&To.=1;
/****השינוי מחודש קודם גדול מ5% והיחס בחודש יעד גדול מ50%****/
else IF &Ratio.&To.>1 and &Ratio.&From.<1 then Ind_&From._&To.=1;
/***מעבר ממצב של לא חורג למצב של חורג***/
else IF &Ratio.&From. ne -9 and &Ratio.&To. = -9 then Ind_&From._&To.=1;
/***בחודש יעד מנצל לא מסגרת ובחודש קודם במצב אחר****/
else IF &Ratio.&To.>0.5 AND &Ratio.&From. in (0,-8,-9) then Ind_&From._&To.=1;
/***יחס בחודש יעד גדול מ50% ובחודש קודם יחס היה 0 או מינוס 99999,99998***/
else Ind_&From._&To.=0;
%mend RRR;
%RRR(Ratio=RevRatio,from=2,To=1)
%RRR(Ratio=RevRatio,from=3,To=2)
%RRR(Ratio=RevRatio,from=4,To=3)
%RRR(Ratio=RevRatio,from=5,To=4)
%RRR(Ratio=RevRatio,from=6,To=5)
%RRR(Ratio=RevRatio,from=7,To=6)
%RRR(Ratio=RevRatio,from=8,To=7)
%RRR(Ratio=RevRatio,from=9,To=8)
%RRR(Ratio=RevRatio,from=10,To=9)
%RRR(Ratio=RevRatio,from=11,To=10)
%RRR(Ratio=RevRatio,from=12,To=11)
Run;
Yes. Use an array (or multiple arrays).
data want;
set have;
array revratio [12] revratio1-revratio12;
do index=1 to dim(revratio)-1;
....
end;
run;
But do we have to guess what you intended? (or deduce it from the code you tried?)
Why are some of the values NEGATIVE? What does that mean? Why do you not calculate the percentage when the values are negative?
I believe you're looking for array processing. Below your logic converted using such syntax.
data have;
input CustID RevRatio1-RevRatio12;
cards;
1 0.9 0.8 0.8 0.7 0.9 0.9 0.7 0.6 0.9 0.9 0.7 0.6
2 0.6 0.1 0.2 0.3 0.2 0.4 0.5 1.1 0.9 0.7 0.8 0.9
3 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
4 -9 0.2 0.1 0.4 0.5 -9 -8 0.1 0.2 0.3 0.4 0.1
;
Run;
data want(drop=i);
set have;
array RevRatio{*} RevRatio1-RevRatio12;
array PCT_change{*} PCT_change_2_1 PCT_change_3_2 PCT_change_4_3 PCT_change_5_4 PCT_change_6_5
PCT_change_7_6 PCT_change_8_7 PCT_change_9_8 PCT_change_10_9 PCT_change_11_10 PCT_change_12_11;
array Ind{*} Ind_2_1 Ind_3_2 Ind_4_3 Ind_5_4 Ind_6_5 Ind_7_6 Ind_8_7 Ind_9_8 Ind_10_9 Ind_11_10 Ind_12_11;
do i=1 to dim(RevRatio)-1;
if RevRatio[i] not in (0,-6,-7,-8,-9,.) and RevRatio[i+1] not in (0,-6,-7,-8,-9,.) then PCT_change[i]=(RevRatio[i]-RevRatio[i+1])/RevRatio[i+1];
if PCT_change[i]>0.05 and RevRatio[i]>0.5 then Ind[i]=1;
else if RevRatio[i+1]<1 and RevRatio[i]>1 then Ind[i]=1;
else if RevRatio[i+1] ne -9 and RevRatio[i] = -9 then Ind[i]=1;
else if RevRatio[i]>0.5 and RevRatio[i+1] in (0,-8,-9) then Ind[i]=1;
else Ind[i]=0;
end;
run;
proc print data=want;
run;
It might be worth to consider a different naming convention (numbering) for variables PCT_change and Ind so you don't need to explicitly list them in array definitions but can reference them the same like RevRatio. Something like:
array PCT_change{12} 8;
And as a more general remark:
Especially in a case where you've got a lot of data a long table structure with monthly tables would likely be much more efficient because you only need to process the data of the last two recent months for your calculations instead of 12 months as it's done currently.
Most SAS Procs work much better with long structures. Data step or SQL views make it easy to combine monthly tables.
You can always create a wide table structure from this monthly long table structures if you need them - and by keeping the monthly tables these wide table structures can include as many months as you need and got monthly tables for.
I think the best choice is using SAS/IML to vectorize this code.
Data have;
input CustID RevRatio1-RevRatio12;
cards;
1 0.9 0.8 0.8 0.7 0.9 0.9 0.7 0.6 0.9 0.9 0.7 0.6
2 0.6 0.1 0.2 0.3 0.2 0.4 0.5 1.1 0.9 0.7 0.8 0.9
3 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
4 -9 0.2 0.1 0.4 0.5 -9 -8 0.1 0.2 0.3 0.4 0.1
;
Run;
proc iml;
use have(keep=RevRatio: ) nobs nobs;
read all var _all_ into r[c=vname];
close;
use have(keep=CustID);
read all var {CustID};
close;
pct_change=j(nobs,ncol(vname)-1,.);
pct_label=j(1,ncol(pct_change),blankstr(20));
ind=j(nobs,ncol(pct_change),0);
ind_label=j(1,ncol(pct_change),blankstr(20));
do i=1 to ncol(pct_change);
idx=loc(^element(r[,i],{0,-6,-7,-8,-9,.}) & ^element(r[,i+1],{0,-6,-7,-8,-9,.})) ;
if ^isempty(idx) then pct_change[idx,i]=(r[idx,i]-r[idx,i+1])/r[idx,i+1];
pct_label[i]='PCT_change_'+compress(vname[i+1],,'kd')+'_'+compress(vname[i],,'kd');
idx=loc( (pct_change[,i]>0.05 & r[,i]>0.5) |
(r[,i+1]<1 & r[,i]>1) |
(r[,i+1]^=-9 & r[,i]=-9) |
(r[,i]>0.5 & element(r[,i+1],{0,-8,-9})) ) ;
if ^isempty(idx) then ind[idx,i]=1;
ind_label[i]='Ind_'+compress(vname[i+1],,'kd')+'_'+compress(vname[i],,'kd');
end;
create want from CustID r pct_change ind[c=('CustID'||vname||pct_label||ind_label)];
append from CustID r pct_change ind;
close;
quit;
Another nice illustration for Maxim 19.
As mentioned, transposing to a long format will make this easier.
I would transpose, and also change the missing value codes (-7,-8,-9) into special missing values, e.g.:
Data have;
input CustID RevRatio1-RevRatio12;
cards;
1 0.9 0.8 0.8 0.7 0.9 0.9 0.7 0.6 0.9 0.9 0.7 0.6
2 0.6 0.1 0.2 0.3 0.2 0.4 0.5 1.1 0.9 0.7 0.8 0.9
3 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 -9
4 -9 0.2 0.1 0.4 0.5 -9 -8 0.1 0.2 0.3 0.4 0.1
;
Run;
data vert (keep=CustID Seq RevRatio);
set have ;
array Ratios {*} RevRatio1-RevRatio12 ;
do Seq=1 to dim(Ratios) ;
RevRatio=Ratios{Seq} ;
*recode negative missing codes to special missing values ;
if RevRatio= -9 then RevRatio=.A ;
if RevRatio= -8 then RevRatio=.B ;
if RevRatio= -7 then RevRatio=.C ;
if RevRatio= -6 then RevRatio=.D ;
output ;
end ;
run ;
In that structure, you can use use the LAG function to see the prior month's (?) value. And then calculate pct change, etc. Something like:
data want ;
set vert ;
_lagRatio=lag(RevRatio) ;
*This calculation seems backwards for pct change ;
*I would expect (RevRatio-_lagRatio)/_lagRatio ;
if seq>1 then do ;
if (RevRatio ne 0) and (n(_lagRatio,RevRatio)=2) then PCT_change=(_lagRatio-RevRatio)/RevRatio ;
*I am not convinced below logic is correct ;
if (PCT_change>.05 and _lagRatio>.5)
or (_lagRatio>1 and RevRatio<1 ) /*This will include missing values for RevRatio! */
or (RevRatio ne .A and _lagRatio=.A)
or (_lagRatio > .5 and RevRatio IN (0,.A,.B))
then Ind=1 ;
else Ind=0 ;
end ;
run ;
*Check the logic ;
proc freq data=want ;
tables RevRatio*_lagRatio*PCT_change*Ind/missing list ;
run ;
When you have repeated / longitudinal data strung out in wide format, the macro language can help you generate lots of code. Using the array statement can make it easer to reference lots of variables with pointers / indexes. Transposing to a vertical structure lets you write less code.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.