Hi
I have a dataset with some missing monthly dates. Please see below:
data have;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
I want to complete all missing months in a year with all the observations of the previous month.
data want;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 02/28/2012 10107 100
1 02/28/2012 10145 250
1 02/28/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 01/31/2016 40125 1000
2 01/31/2016 31799 850
2 02/28/2016 40125 1000
2 02/28/2016 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
Please guide in this regard. Thanks.
That was an challenge .
data have;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
data temp;
merge have have(keep=id date firstobs=2 rename=(id=_id date=_date));
run;
data want;
if _n_=1 then do;
if 0 then set have;
declare hash h(multidata:'y');
h.definekey('id','date');
h.definedata('PERMNO','SHARES');
h.definedone();
end;
set temp;
by id date;
if first.id then h.clear();
h.add();
output;
if last.date and id=_id then do;
key=date;
do i=1 to intck('month',date,_date)-1;
date=intnx('month',date,1,'e');
rc=h.find(key:id,key:key);
do while(rc=0);
output;
rc=h.find_next(key:id,key:key);
end;
end;
end;
drop _: key rc i;
run;
data want(keep=id dt permno shares);
declare hash h(dataset:"have", multidata:"Y");
h.definekey("id", "date");
h.definedata("permno", "shares");
h.definedone();
declare hash h2(multidata:"Y");
h2.definekey("id", "dt");
h2.definedata("permno", "shares");
h2.definedone();
declare hiter hi2("h2");
do until (last.id);
set have;
by id;
if first.id then _min=date;
end;
do dt=_min by 0 until (dt gt date);
if h.check(key:id, key:dt) = 0 then do;
h.reset_dup();h2.clear();
do while (h.do_over(key:id, key:dt)=0);
output;
h2.add();
end;
end;
else do;
do rc=hi2.first() by 0 while (rc=0);
output;
rc=hi2.next();
end;
end;
dt=intnx("month", dt, 1, "e");
end;
format dt mmddyy10.;
run;
Result:
id permno shares dt 1 10107 100 01/31/2012 1 10145 250 01/31/2012 1 11340 180 01/31/2012 1 10107 100 02/29/2012 1 10145 250 02/29/2012 1 11340 180 02/29/2012 1 10145 450 03/31/2012 1 25267 450 03/31/2012 1 10145 300 04/30/2012 2 40125 1000 12/31/2015 2 31799 850 12/31/2015 2 40125 1000 01/31/2016 2 31799 850 01/31/2016 2 40125 1000 02/29/2016 2 31799 850 02/29/2016 2 40125 720 03/31/2016 2 31799 800 03/31/2016
That was an challenge .
data have;
input ID DATE: MMDDYY10. PERMNO SHARES;
format DATE MMDDYY10.;
datalines;
1 01/31/2012 10107 100
1 01/31/2012 10145 250
1 01/31/2012 11340 180
1 03/31/2012 10145 450
1 03/31/2012 25267 450
1 04/30/2012 10145 300
2 12/31/2015 40125 1000
2 12/31/2015 31799 850
2 03/31/2016 40125 720
2 03/31/2016 31799 800
;
run;
data temp;
merge have have(keep=id date firstobs=2 rename=(id=_id date=_date));
run;
data want;
if _n_=1 then do;
if 0 then set have;
declare hash h(multidata:'y');
h.definekey('id','date');
h.definedata('PERMNO','SHARES');
h.definedone();
end;
set temp;
by id date;
if first.id then h.clear();
h.add();
output;
if last.date and id=_id then do;
key=date;
do i=1 to intck('month',date,_date)-1;
date=intnx('month',date,1,'e');
rc=h.find(key:id,key:key);
do while(rc=0);
output;
rc=h.find_next(key:id,key:key);
end;
end;
end;
drop _: key rc i;
run;
Assuming that you want to group by ID and PERMNO it will be easier if you sort it that way.
proc sort;
by id permno date ;
run;
data want ;
set have ;
by id permno ;
set have(firstobs=2 keep=date rename=(date=next_date)) have(obs=1 drop=_all_);
if last.permno then next_date=date;
imputed=0;
do until(date >= next_date);
output;
date=intnx('month',date,1,'end');
imputed=1;
end;
drop next_date;
run;
Obs ID PERMNO DATE SHARES imputed 1 1 10107 2012-01-31 100 0 2 1 10145 2012-01-31 250 0 3 1 10145 2012-02-29 250 1 4 1 10145 2012-03-31 450 0 5 1 10145 2012-04-30 300 0 6 1 11340 2012-01-31 180 0 7 1 25267 2012-03-31 450 0 8 2 31799 2015-12-31 850 0 9 2 31799 2016-01-31 850 1 10 2 31799 2016-02-29 850 1 11 2 31799 2016-03-31 800 0 12 2 40125 2015-12-31 1000 0 13 2 40125 2016-01-31 1000 1 14 2 40125 2016-02-29 1000 1 15 2 40125 2016-03-31 720 0
Note: 2012 and 2016 are leap years. Also using either of MDY or DMY order for dates will confuse half of your audience.
Available on demand!
Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.