Before I saw @PGStats 's solution, I made some adaptations to what @smantha suggested. This is what I got to work, but @PGStats gave a much more elegant solution! proc means data=drugs nway missing noprint;
class id day;
var day;
output out=test n=;
run;
proc sort data=drugs out=drugs2 nodupkey;
by id regimen start end;
run;
*figure out when the number of regimens switches;
data test2;
set test;
by id;
lagfreq = lag(_freq_);
if first.id then lagfreq = .;
if _freq_ = lagfreq then delete;
run;
proc sql;
create table test3 as
select a.*, b.regimen
from test2 a
left join drugs b
on (a.id=b.id) and (a.day=b.day);
quit;
*if a given day has multiple combinations of drugs, take the most complicated combo;
data test3;
set test3;
if regimen in ("M+E", "M+A", "M+R") then combo_n = 2;
if regimen in ("M+E+R", "M+E+A", "M+R+A") then combo_n = 3;
if regimen = "M+E+R+A" then combo_n = 4;
run;
proc sql;
create table test4 as
select *, max(combo_n) as maxcombo
from test3
group by id, day
having combo_n = maxcombo;
quit;
*determine when the next regimen started;
proc sort data=test4; by id descending day; run;
data test5;
set test4;
format nextdate MMDDYY10.;
by id;
nextdate=lag(day);
if first.id then nextdate=.;
run;
proc sort data=test5; by id day; run;
*get the final end date for each person;
proc sql;
create table ends as
select id, max(end) as final format = MMDDYY10.
from drugs2
group by id
having end=final;
quit;
proc sql;
create table regimens as
select a.id, a.regimen, a.day, a.nextdate, b.final
from test5 a
left join ends b
on (a.id=b.id);
quit;
data regimens;
set regimens;
format start end MMDDYY10.;
start = day;
if nextdate = . then end = final;
else end = nextdate;
regimentime = end - start;
drop day nextdate final;
run; Thanks to both of you for your help!
... View more