Hi
I have a dataset with some missing monthly dates. Please see below:
data have;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
I want to complete all missing months in a year with all the observations of the previous month.
data want;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 02/28/2012 10107 100
1 02/28/2012 10145 250
1 02/28/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 01/31/2016 40125 1000
2 01/31/2016 31799 850
2 02/28/2016 40125 1000
2 02/28/2016 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
Please guide in this regard. Thanks.
That was an challenge .
data have;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
data temp;
merge have have(keep=id date firstobs=2 rename=(id=_id date=_date));
run;
data want;
if _n_=1 then do;
if 0 then set have;
declare hash h(multidata:'y');
h.definekey('id','date');
h.definedata('PERMNO','SHARES');
h.definedone();
end;
set temp;
by id date;
if first.id then h.clear();
h.add();
output;
if last.date and id=_id then do;
key=date;
do i=1 to intck('month',date,_date)-1;
date=intnx('month',date,1,'e');
rc=h.find(key:id,key:key);
do while(rc=0);
output;
rc=h.find_next(key:id,key:key);
end;
end;
end;
drop _: key rc i;
run;
data want(keep=id dt permno shares);
declare hash h(dataset:"have", multidata:"Y");
h.definekey("id", "date");
h.definedata("permno", "shares");
h.definedone();
declare hash h2(multidata:"Y");
h2.definekey("id", "dt");
h2.definedata("permno", "shares");
h2.definedone();
declare hiter hi2("h2");
do until (last.id);
set have;
by id;
if first.id then _min=date;
end;
do dt=_min by 0 until (dt gt date);
if h.check(key:id, key:dt) = 0 then do;
h.reset_dup();h2.clear();
do while (h.do_over(key:id, key:dt)=0);
output;
h2.add();
end;
end;
else do;
do rc=hi2.first() by 0 while (rc=0);
output;
rc=hi2.next();
end;
end;
dt=intnx("month", dt, 1, "e");
end;
format dt mmddyy10.;
run;
Result:
id permno shares dt 1 10107 100 01/31/2012 1 10145 250 01/31/2012 1 11340 180 01/31/2012 1 10107 100 02/29/2012 1 10145 250 02/29/2012 1 11340 180 02/29/2012 1 10145 450 03/31/2012 1 25267 450 03/31/2012 1 10145 300 04/30/2012 2 40125 1000 12/31/2015 2 31799 850 12/31/2015 2 40125 1000 01/31/2016 2 31799 850 01/31/2016 2 40125 1000 02/29/2016 2 31799 850 02/29/2016 2 40125 720 03/31/2016 2 31799 800 03/31/2016
That was an challenge .
data have;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
data temp;
merge have have(keep=id date firstobs=2 rename=(id=_id date=_date));
run;
data want;
if _n_=1 then do;
if 0 then set have;
declare hash h(multidata:'y');
h.definekey('id','date');
h.definedata('PERMNO','SHARES');
h.definedone();
end;
set temp;
by id date;
if first.id then h.clear();
h.add();
output;
if last.date and id=_id then do;
key=date;
do i=1 to intck('month',date,_date)-1;
date=intnx('month',date,1,'e');
rc=h.find(key:id,key:key);
do while(rc=0);
output;
rc=h.find_next(key:id,key:key);
end;
end;
end;
drop _: key rc i;
run;
Assuming that you want to group by ID and PERMNO it will be easier if you sort it that way.
proc sort;
by id permno date ;
run;
data want ;
set have ;
by id permno ;
set have(firstobs=2 keep=date rename=(date=next_date)) have(obs=1 drop=_all_);
if last.permno then next_date=date;
imputed=0;
do until(date >= next_date);
output;
date=intnx('month',date,1,'end');
imputed=1;
end;
drop next_date;
run;
Obs ID PERMNO DATE SHARES imputed 1 1 10107 2012-01-31 100 0 2 1 10145 2012-01-31 250 0 3 1 10145 2012-02-29 250 1 4 1 10145 2012-03-31 450 0 5 1 10145 2012-04-30 300 0 6 1 11340 2012-01-31 180 0 7 1 25267 2012-03-31 450 0 8 2 31799 2015-12-31 850 0 9 2 31799 2016-01-31 850 1 10 2 31799 2016-02-29 850 1 11 2 31799 2016-03-31 800 0 12 2 40125 2015-12-31 1000 0 13 2 40125 2016-01-31 1000 1 14 2 40125 2016-02-29 1000 1 15 2 40125 2016-03-31 720 0
Note: 2012 and 2016 are leap years. Also using either of MDY or DMY order for dates will confuse half of your audience.
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.