I have panel data by firm id and fiscal quarter. I am trying to create lags of my key variables.
Unfortunately my macro is not working. Can someone plea help.
data WORK.TEST;
infile datalines dsd truncover;
input id:$6. FQTR:$6. var1:F18.4 var2:F18.4 var3:F18.4;
format var1 F18.4 var2 F18.4 var3 F18.4;
datalines4;
001000,3926,,,0.0000
001000,4018,,,
001000,4108,,,
001000,4199,,,
001000,4291,,,0.0000
001000,4383,,,
001000,4474,,,
001000,4565,,,
001000,4657,,,0.0000
001000,4749,,,
001000,4839,,,
001000,4930,,,
001000,5022,,,0.0000
001000,5114,,,
001000,5204,,,
001000,5295,,,
001000,5387,,,-0.5800
001000,5479,,,0.0000
001000,5569,,,0.0000
001000,5660,,,0.0000
001000,5752,,,0.0000
001000,5844,,,0.0000
001000,5935,,,0.0000
001000,6026,,,0.0000
001000,6118,,,0.0000
001000,6210,,,0.0000
001000,6300,,,
001000,6391,,,
001000,6483,,,
001000,6575,,,0.0000
001001,8582,,,0.0000
001001,8674,,,0.0000
001001,8766,,,0.0000
001001,8857,,,0.0000
001001,8948,,,0.0000
001001,9040,,,0.0000
001001,9132,,,0.0000
001001,9222,,,0.0000
001001,9313,,,0.4700
001001,9405,,,0.1190
001001,9497,,,0.0000
001003,8674,,,0.0000
001003,8766,,,0.0000
001003,8857,,,0.0000
001003,8948,,,0.0000
001003,9040,,,0.0000
001003,9132,,,0.0000
001003,9222,,,0.0000
001003,9313,,,0.0000
001003,9405,,,0.0000
001003,9497,,,0.0000
001003,9587,,,0.0000
001003,9678,,,0.0000
001003,9770,,,0.0000
001003,9862,,,0.0000
001003,9952,,,0.0000
001003,10043,,,0.0000
001003,10135,,,0.0000
001003,10227,,,0.0000
001003,10318,,,0.0000
001003,10409,,,0.0000
001003,10501,,,-3.5000
001003,10593,,,
001003,10683,,,
001004,4291,,,0.0000
001004,4383,,,
001004,4474,,,
001004,4565,,,
001004,4657,,,0.0000
001004,4749,,,
001004,4839,,,
001004,4930,,,
001004,5022,,,0.0000
001004,5114,,,
001004,5204,,,
001004,5295,,,
001004,5387,,,0.0000
001004,5479,,,0.0000
001004,5569,,,0.0000
001004,5660,,,0.0000
001004,5752,,,0.0000
001004,5844,,,0.0000
001004,5935,,,0.0000
001004,6026,,,0.0000
001004,6118,,,0.0000
001004,6210,,,0.0000
001004,6300,,,0.0000
001004,6391,,,0.0000
001004,6483,,,0.0000
001004,6575,,,0.0000
001004,6665,,,0.0000
001004,6756,,,0.0000
001004,6848,,,0.0000
001004,6940,,,0.0000
001004,7030,,,0.0000
001004,7121,,,0.0000
001004,7213,,,0.0000
001004,7305,,,0.0000
001004,7396,,,0.0000
001004,7487,,,0.0000
;;;;
Here is the macro:
***CREATE LAGS OFKEY VARIABLES***;
%macro lag(vars, lags);
%let m = %sysfunc(countw(&vars));
%do i=1 %to &m;
%let var = %scan(&vars,&i);
%do j=1 %to &lags;
%do;
if first.id then &var._lag&j=.;
&var._lag&j = lag&j(&var);
%end;
%end;
%end;
%mend lag;
data test;
set test;
by id;
%lag(var1 var2 var3, 1);
%lag(var1 var2 var3, 2);
run;
Hi @Agent1592
I think you are on the right track, but your code needs a few modifications.
I can't see why the use of RETAIN would be a better solution, and it is definitely not easier to code in a macro (I tried), so - based on Tom's suggestions - I think the following code will work the way you want.
There is a slight discrepancy between your macro code and your usage example, because your macro is coded to loop over lags from 1 to the specified max, but your example has two calls in the same data step with lags=1 and lags=2. The first call is unnecessary and only writes redundant code.
%macro lag(vars, lags);
%let m = %sysfunc(countw(&vars));
row + 1;
if first.id then row = 1;
%do i = 1 %to &m;
%let var = %scan(&vars,&i);
%do j = 1 %to &lags;
&var._lag&j = lag&j(&var);
if row <= &j then &var._lag&j = .;
%end;
%end;
%mend;
These two statements are in the wrong order.
if first.id then &var._lag&j=.;
&var._lag&j = lag&j(&var);
Plus if you really want to do it this way then I think you will need keep track of how many observations you have seen for this ID.
So you want to have your macro generate code that looks like:
row+1;
if first.id then row=1;
VAR1_lag1=lag1(VAR1);
VAR2_lag2=lag2(VAR2);
....
if row<=1 then call missing(VAR1_lag1,VAR2_lag1);
if row<=2 then call missing(VAR1_lag2,VAR2_lag2);
...
I am not a big fan of using the new IFN/IFC functions because they are much more confusing code than normal IF/THEN/ELSE, but it would probably be easier for the macro to generate code like:
&var._lag&j=ifn(row<=&j,.,lag&j(&var));
instead of the all of the IF statements I suggested above.
See if you can mosidy your macro to generate code to calculate the ROW variable and generate those assignment statements using IFN() function to create the new variables.
The better solution would be to just use RETAIN instead of LAG(). Perhaps someone can post an answer that does that.
Hi @Agent1592
I think you are on the right track, but your code needs a few modifications.
I can't see why the use of RETAIN would be a better solution, and it is definitely not easier to code in a macro (I tried), so - based on Tom's suggestions - I think the following code will work the way you want.
There is a slight discrepancy between your macro code and your usage example, because your macro is coded to loop over lags from 1 to the specified max, but your example has two calls in the same data step with lags=1 and lags=2. The first call is unnecessary and only writes redundant code.
%macro lag(vars, lags);
%let m = %sysfunc(countw(&vars));
row + 1;
if first.id then row = 1;
%do i = 1 %to &m;
%let var = %scan(&vars,&i);
%do j = 1 %to &lags;
&var._lag&j = lag&j(&var);
if row <= &j then &var._lag&j = .;
%end;
%end;
%mend;
You can eliminate the need for RETAIN by adding a DO loop around the SET statement.
So if you have this input:
%let nlags=3;
%let varlist=var1 var2 ;
You can generate this code.
%let nvars=%sysfunc(countw(&varlist));
data want ;
do until (last.id);
set have ;
by id;
array lags[&nvars,0:&nlags]
var1 var1_lag1-var1_lag&nlags
var2 var2_lag1-var2_lag&nlags
;
output;
do i=1 to &nvars; do j=&nlags to 1 by -1;
lags[i,j] = lags[i,j-1];
end;end;
end;
drop i j;
run;
Now the only thing that needs macro logic to generate it is the list of variables for the ARRAY statement.
array lags[&nvars,0:&nlags]
%do i=1 %to &nvars;
%let var=%scan(&varlist,&i);
&var. &var._lag1-&var.lag&nlags
%end;
;
HI,
Maybe try something like this:
data WORK.TEST;
infile datalines dsd truncover dlm = ",";
input id:$6. FQTR:$6. var1 var2 var3;
datalines4;
001000,3926,1,2,3
001000,4018,4,5,6
001000,4108,7,8,9
002000,3926,1,2,3
002000,4018,4,5,6
002000,4108,7,8,9
002000,3926,10,11,12
003000,4018,4,5,6
003000,4108,7,8,9
;;;;
run;
options mprint;
%macro lag(vars, lags);
%local m;
%let m = %sysfunc(countw(&vars));
array var&lags._[*] &vars.;
array lag&lags._[&m.] %sysfunc(tranwrd(&vars, %str( ), _lag&lags.%str( )))_lag&lags.;
if first.id then _cnt_&lags._=0; drop _cnt_&lags._;
_cnt_&lags._+1;
do _N_=1 to dim(var&lags._);
lag&lags._[_N_] = lag&lags.(var&lags._[_N_]);
end;
if _cnt_&lags._ <= &lags. then
call missing(of lag&lags._[*]);
%mend lag;
data test2;
set test;
by id;
%lag(var1 var2 var3, 1);
%lag(var1 var2 var3, 2);
run;
it uses arrays so it is easier to maintain.
all the best
Bart
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.