I changed the date variables from character to numeric SAS dates.
Feel free to ask.
data DB1;
input ID :$20. (Start End)(:date9.) Hospital :$20.;
format Start End date9.;
cards;
0001 01JAN2015 06FEB2016 C
0001 07FEB2016 25APR2016 C
0001 26APR2016 31DEC2019 D
0002 01JAN2017 12JUL2017 C
0002 13JUL2017 31DEC2019 D
0002 01JAN2020 31DEC2020 D
;
data DB2;
input ID :$20. (Start End)(:date9.);
format Start End date9.;
cards;
0001 28JAN2016 08FEB2016
0001 24APR2016 28APR2016
0001 28NOV2019 13DEC2019
0002 11JUL2017 14JUL2017
0002 15JUL2017 01OCT2019
0002 14AUG2020 13SEP2020
;
data DB11(drop = s);
set DB1;
by ID Hospital notsorted;
if first.Hospital then s = Start;
if last.Hospital then do;
Start = s;
output;
end;
retain s;
run;
data want(keep = ID Start End Hospital);
if _N_ = 1 then do;
dcl hash h(dataset : 'DB11(rename = (start = s end = e))', multidata : 'Y');
h.definekey('ID');
h.definedata('Hospital', 's', 'e');
h.definedone();
end;
set DB2;
if 0 then set DB1(rename = (start = s end = e));
call missing(s, e, hospital);
do while (h.do_over() = 0);
if d = 1 then start = s ;
if s <= Start <= e & End > e then do;
d = 1;
_end = end;
end = e;
output;
end = _end;
end;
else do;
d = 0;
if s <= Start <= e then output;
end;
end;
run;
Result:
ID Start End Hospital
0001 28JAN2016 08FEB2016 C
0001 24APR2016 25APR2016 C
0001 26APR2016 28APR2016 D
0001 28NOV2019 13DEC2019 D
0002 11JUL2017 12JUL2017 C
0002 13JUL2017 14JUL2017 D
0002 15JUL2017 01OCT2019 D
0002 14AUG2020 13SEP2020 D