proc sql; create table counter as select cell,date,sales, (case when sales is not null then count(sales) else 0 end) as cnt from sample group by cell order by cell,date; quit; data full_histor to_impute; set counter; by cell; if cnt=24 then output full_histor; else output work.to_impute; run; /* create new column to flag those with 22 and those wih 23 months historicity */ /* 22*22=482 */ proc sql; create table counter22_23 as select *,(case when sum(cnt)=484 then 2 else 1 end) as cnt2 from to_impute group by cell order by cell,date; quit; /* Split into two datasets-those with 22 and those with 23 months histor. */ data impute22 impute23; set counter22_23; if cnt2=2 then output impute22; else output impute23; run; /* We want to create flag=1,2 for 23 months historicity */ /* Step1:we create a col flag with 0 or 1 if there is missing in sales */ data test1(drop= cnt2) ; set impute23; by cell date; if sales =. then flag=1; else flag=0; run; /*Split into two dataset those without sales and those full with values in sales */ data test2 test4; set test1; if cnt=0 then output test2; else output test4; run; /* Flag= 1 or 2 based on missing sales position */ data test3; set test2; by cell; if first.cell then flag=1; else flag=2; run; /* Final table with the flag= 0 or 1 based on the position of the missing sales*/ data flag_23; set test3 test4; by cell date; run; /* ******** */ /* The same idea for 22m historicity to create flag_22 */ /* impute 22 months */ data test1(drop= cnt2) ; set impute22; by cell date; if sales =. then flag=1; else flag=0; run; data test2 test4; set test1; if cnt=0 then output test2; else output test4; run; data test3; set test2; by cell; if first.cell then flag=1; else flag=2; run; data flag_22; set test3 test4; by cell date; run; /* Create 6 columns with leads,lags to fill up 22 months historicity */ proc expand data= flag_22 out= f22 (drop= cnt ) method=none; by cell; convert sales = lag1 /transformout = (lag 1); convert sales = lag2 /transformout = (lag 2); convert sales = lag3 /transformout = (lag 3); convert sales; run; proc expand data= f22 out= f22 method=none; by cell; convert sales = lead1 /transformout = (lead 1); convert sales = lead2 /transformout = (lead 2); convert sales = lead3 /transformout = (lead 3); convert sales; run; data a1; set f22; if (sales=. and flag=2 and (lag1 ne . and lag2 ne . and lag3 ne .)) then sales= mean(lag1,lag2,lag3); if (sales=. and flag =1 and (lag1 ne . and lag2 ne . and lag3 ne .)) then sales=mean(lag1,lag2,lag3); run; data t3 (drop= lead1 lead2 lead3 lag1 lag2 lag3 time ); set a1; run; /* We dropped the columns and we re-run */ proc expand data= t3 out= t1b method=none; by cell; convert sales = lag1 /transformout = (lag 1); convert sales = lag2 /transformout = (lag 2); convert sales = lag3 /transformout = (lag 3); convert sales; run; proc expand data=t1b out=t1b method=none; by cell; convert sales = lead1 /transformout = (lead 1); convert sales = lead2 /transformout = (lead 2); convert sales = lead3 /transformout = (lead 3); convert sales; run; data final ; set t1b; if (sales=. and flag=2 and (lag1 ne . and lag2 ne . and lag3 ne .)) then sales= mean(lag1,lag2,lag3); if (sales=. and flag=2 and (lead1 ne . and lead2 ne . and lead3 ne .)) then sales= mean(lead1,lead2,lead3); if (sales=. and flag=1 and (lead1 ne . and lead2 ne . and lead3 ne .)) then sales=mean(lead1,lead2,lead3); run; data a4 (drop= lead1 lead2 lead3 lag1 lag2 lag3 time ); set final; run; /* split full with 23months*/ proc sql; create table a as select cell,date,sales,flag, (case when sales is not null then count(sales) else 0 end) as cnt from a4 group by cell order by cell,date; quit; /* *********************** */ cnt=24 or cnt=23 or 0 to_impute /* to impute 201811 */ /* from 22months historicity to 23m. and we merge in one table with 23m histor from above */ data full_impute22_23 (drop = cnt ); set a4 flag_23; run; proc sql; create table a as select cell,date,sales,flag, (case when sales is not null then count(sales) else 0 end) as cnt from full_impute22_23 group by cell order by cell,date; quit; /* split into two datasets-the one(full_histor)=24months historicity and to_impute=those with 22,23 months historicity */ data full_histor2 to_impute2; set work.a; by cell; if cnt=24 then output full_histor2; else output work.to_impute2; run; proc expand data= to_impute2 out= ff (drop= cnt ) method=none ; by cell; convert sales = lag1 /transformout = (lag 1); convert sales = lag2 /transformout = (lag 2); convert sales = lag3 /transformout = (lag 3); convert sales; run; proc expand data=ff out=ff method=none; by cell; convert sales = lead1 /transformout = (lead 1); convert sales = lead2 /transformout = (lead 2); convert sales = lead3 /transformout = (lead 3); convert sales; run; data a1; set ff; if (sales=. and flag=2 and (lag1 ne . and lag2 ne . and lag3 ne .)) then sales= mean(lag1,lag2,lag3); if (sales=. and flag =2 and (lead1 ne . and lead2 ne . and lead3 ne .)) then sales=mean(lead1,lead2,lead3); if (sales=. and flag=1 and (lead1 ne . and lead2 ne . and lead3 ne .)) then sales=mean(lead1,lead2,lead3); run; data t3 (drop= lead1 lead2 lead3 lag1 lag2 lag3 time ); set a1; run; /* We dropped cols and we re-run */ proc expand data= t3 out= t1b method=none; by cell; convert sales = lag1 /transformout = (lag 1); convert sales = lag2 /transformout = (lag 2); convert sales = lag3 /transformout = (lag 3); convert sales; run; proc expand data= t1b out=t1b method=none; by cell; convert sales = lead1 /transformout = (lead 1); convert sales = lead2 /transformout = (lead 2); convert sales = lead3 /transformout = (lead 3); convert sales; run; /* Fill up with the mean */ data final ; set t1b; if (sales=. and flag=1 and (lag1 ne . and lag2 ne . and lag3 ne .)) then sales= mean(lag1,lag2,lag3); run; data final2(drop=lag1 lag2 lag3 lead1 lead2 lead3 flag time); set final; run; /* Drop the unneccessary cols */ data full_histor(drop=cnt); set full_histor; run; data full_histor2(drop=cnt flag); set full_histor2; run; /* Combine with the 24months historicity */ data imputed_final; set full_histor full_histor2 final2 ; run;