HI all,
I currently have a dataset that has
data have;
input enrolid year enroll_start_dt enroll_end_dt; /* Don't know how to format the dates -- any advice?? */
datalines;
1234 2010 01-JAN-2010 31-DEC-2010
1234 2011 01-JAN-2011 7-SEP-2011
5678 2010 01-JAN-2010 8-AUG-2010
4829 2010 01-JAN-2010 31-DEC-2010
4829 2011 01-JAN-2011 31-DEC-2011
4829 2012 01-JAN-2012 31-DEC-2012
4829 2013 01-JAN-2013 31-DEC-2013
;
And I want to get this:
data want;
input enrolid year enroll_start_dt enroll_end_dt overall_start overall_end overall_days;
datalines;
1234 2010 01-JAN-2010 31-DEC-2010 01-JAN-2010 7-SEP-2011 614
1234 2011 01-JAN-2011 7-SEP-2011 01-JAN-2010 7-SEP-2011 614
5678 2010 01-JAN-2010 8-AUG-2010 01-JAN-2010 8-AUG-2010 219
4829 2010 01-JAN-2010 31-DEC-2010 01-JAN-2010 31-DEC-2013 1095
4829 2011 01-JAN-2011 31-DEC-2011 01-JAN-2010 31-DEC-2013 1095
4829 2012 01-JAN-2012 31-DEC-2012 01-JAN-2010 31-DEC-2013 1095
4829 2013 01-JAN-2013 31-DEC-2013 01-JAN-2010 31-DEC-2013 1095
;
But with the current code I'm running I'm getting something funky ... that looks almost like the above but I am not able to retain all of the original observations corresponding to each unique enrolid - enroll_start_dt - enroll_end_dt combination...
My current code is as follows:
data want;
set have;
by enrolid enroll_start_dt enroll_end_dt;
retain enroll_start_dt enroll_end_dt overall_start overall_end overall_days;
format overall_start overall_end date11.;
if first.enrolid and first.enroll_start_dt and first.enroll_end_dt then do;
overall_start=enroll_start_dt;
overall_end=enroll_end_dt;
overall_days=overall_end-overall_start;
end;
else if enroll_start_dt>overall_end+1 then do;
output;
overall_start=enroll_start_dt;
overall_end=enroll_end_dt;
overall_days=overall_end-overall_start;
end;
else if enroll_start_dt<=overall_end+1 then do;
overall_start=min(overall_start,enroll_start_dt);
overall_end = max(overall_end, enroll_end_dt);
overall_days=overall_end-overall_start;
end;
if last.enrolid then output;
run;
Here a solution approach using SAS SQL flavor:
data have;
input enrolid year (enroll_start_dt enroll_end_dt) (:date11.);
format enroll_start_dt enroll_end_dt date11.;
datalines;
1234 2010 01-JAN-2010 31-DEC-2010
1234 2011 01-JAN-2011 7-SEP-2011
5678 2010 01-JAN-2010 8-AUG-2010
4829 2010 01-JAN-2010 31-DEC-2010
4829 2011 01-JAN-2011 31-DEC-2011
4829 2012 01-JAN-2012 31-DEC-2012
4829 2013 01-JAN-2013 31-DEC-2013
;
run;
proc sql;
create table want as
select
*
,min(enroll_start_dt) as overall_start format=date11.
,max(enroll_end_dt) as overall_end format=date11.
,(calculated overall_end - calculated overall_start) as overall_days
from
have
group by enrolid
;
run;
data have;
input enrolid year (enroll_start_dt enroll_end_dt) (:date9.);
format enroll_start_dt enroll_end_dt date9.;
datalines;
1234 2010 01-JAN-2010 31-DEC-2010
1234 2011 01-JAN-2011 7-SEP-2011
5678 2010 01-JAN-2010 8-AUG-2010
4829 2010 01-JAN-2010 31-DEC-2010
4829 2011 01-JAN-2011 31-DEC-2011
4829 2012 01-JAN-2012 31-DEC-2012
4829 2013 01-JAN-2013 31-DEC-2013
;
data want;
do until(last.enrolid);
set have;
by enrolid notsorted;
if first.enrolid then overall_start =enroll_start_dt;
if last.enrolid then do;
overall_end =enroll_end_dt;
overall_days=intck('days', overall_start,overall_end);
end;
end;
do until(last.enrolid);
set have;
by enrolid notsorted;
output;
end;
format overall_start overall_end date9.;
run;
Here a solution approach using SAS SQL flavor:
data have;
input enrolid year (enroll_start_dt enroll_end_dt) (:date11.);
format enroll_start_dt enroll_end_dt date11.;
datalines;
1234 2010 01-JAN-2010 31-DEC-2010
1234 2011 01-JAN-2011 7-SEP-2011
5678 2010 01-JAN-2010 8-AUG-2010
4829 2010 01-JAN-2010 31-DEC-2010
4829 2011 01-JAN-2011 31-DEC-2011
4829 2012 01-JAN-2012 31-DEC-2012
4829 2013 01-JAN-2013 31-DEC-2013
;
run;
proc sql;
create table want as
select
*
,min(enroll_start_dt) as overall_start format=date11.
,max(enroll_end_dt) as overall_end format=date11.
,(calculated overall_end - calculated overall_start) as overall_days
from
have
group by enrolid
;
run;
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.