I am trying to calculate some statistics for a given variable based on the client's id and the time horizon. My current solution is show below, however, I would like to know if there is a way to reformat the code into a datastep instead of an sql join, because the join requires an excessive amount of time to execute on my real dataset.
data have1(drop=t);
id = 1;
dt = '31dec2020'd;
do t=1 to 10;
dt = dt + 1;
var = rand('uniform');
output;
end;
format dt ddmmyyp10.;
run;
data have2(drop=t);
id = 2;
dt = '31dec2020'd;
do t=1 to 10;
dt = dt + 1;
var = rand('uniform');
output;
end;
format dt ddmmyyp10.;
run;
data have_fin;
set have1 have2;
run;
Proc sql;
create table want1 as
select a.id, a.dt,a.var, mean(b.var) as mean_var_3d
from have_fin as a
left join have_fin as b
on a.id = b.id and intnx('day',a.dt,-3,'S') < b.dt <= a.dt
group by 1,2,3;
Quit;
Proc sql;
create table want2 as
select a.id, a.dt,a.var, mean(b.var) as mean_var_3d
from have_fin as a
left join have_fin as b
on a.id = b.id and intnx('day',a.dt,-6,'S') < b.dt <= a.dt
group by 1,2,3;
Quit;