I have data in the following format:
data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2014 05/21/2014 1
2 01/20/2012 05/05/2011 06/04/2011 1
2 01/20/2012 07/30/2011 08/29/2011 1
;
run;
Here start date and end date are the dates between which patients are taking drugs (drug = 1). Need to create observations when patients are not taking drugs (drug = 0).
The date count should start from the service day. Is there any way to arrange the data in the following format:
Observations | ID | Service date | Start date | End Date | Drug | Duration |
1 | 1 | 9/23/2013 | 9/23/2013 | 12/3/2014 | 0 | 436 |
2 | 1 | 9/23/2013 | 12/3/2014 | 12/4/2014 | 1 | 1 |
3 | 1 | 9/23/2013 | 12/4/2014 | 5/20/2015 | 0 | 167 |
4 | 1 | 9/23/2013 | 5/20/2015 | 5/21/2015 | 1 | 1 |
5 | 2 | 1/20/2012 | 1/20/2012 | 5/5/2012 | 0 | 106 |
6 | 2 | 1/20/2012 | 5/5/2012 | 6/4/2012 | 1 | 30 |
7 | 2 | 1/20/2012 | 6/4/2012 | 7/30/2012 | 0 | 56 |
8 | 2 | 1/20/2012 | 7/30/2012 | 8/29/2012 | 1 | 30 |
Thanks in advance!
OK. Actually this question is the same as original one.
data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;
data have_drug;
set have;
do date=drug_start_dt to drug_end_dt;
output;
end;
format date mmddyy10.;
keep id date drug;
run;
proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
from have ;
quit;
data temp1;
set temp;
do date=min to max;
output;
end;
format date mmddyy10.;
keep id date;
run;
proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
from temp1 as a left join have_drug as b
on a.id=b.id and a.date=b.date
order by id,date;
quit;
proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;
data want;
merge temp(rename=(min=service_start_dt max=service_end_dt))
all_data1(drop=_type_ rename=(_freq_=duration));
by id;
run;
In your desired result sample, why are start_dt values equal to the previous end_dt instead instead of one day after? Also your sample dates in the data step do not match the dates in the result table.
data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1 (was 05/20/2014 and 05/21/2014)
2 01/20/2012 05/05/2012 06/04/2012 1 (was 05/05/2011 and 06/04/2011)
2 01/20/2012 07/30/2012 08/29/2012 1 (was 07/30/2011 and 08/29/2011)
;
run;
data want (drop=_:);
set have;
by id service_dt;
_start_zero=ifn(first.service_dt=1,service_dt,lag(end_dt));
if _start_zero^=start_dt then do;
drug=0;
end_dt=start_dt;
start_dt=_start_zero;
duration=end_dt-start_dt;
output;
end;
set have; **reread the observation to restore the drug=1 date values**;
duration=end_dt-start_dt;
output;
run;
data have;
input id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1
2 01/20/2012 05/05/2012 06/04/2012 1
2 01/20/2012 07/30/2012 08/29/2012 1
;
run;
data have_drug;
set have;
do date=start_dt to end_dt;
output;
end;
format date mmddyy10.;
keep id date drug;
run;
proc sql;
create table temp as
select id,min(service_dt) as min format=mmddyy10.,max(end_dt) as max format=mmddyy10.
from have
group by id;
quit;
data temp1;
set temp;
do date=min to max;
output;
end;
format date mmddyy10.;
keep id date;
run;
proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
from temp1 as a left join have_drug as b
on a.id=b.id and a.date=b.date
order by id,date;
quit;
proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;
data want;
merge temp(keep=id min rename=(min=Service_date))
all_data1(drop=_type_ rename=(_freq_=duration));
by id;
run;
Thank you for the help!
I have one more question. With the same data if I want to create one more row for all individuals showing the time between last day patient have drug (drug_end_dt) to the last day patient was observed (service_end_dt), should I follow the same steps? It would be very helpful if you can show where in the code I need to make this change?
The same data with one more variable is below:
data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;
And I am trying to create a table like below. [Here patient receive service between the service_start_dt and service_end_dt . Within this period there are times when patient can or cannot take the drug. between drug_start_dt and drug_end_dt is thh time when patients are taking the drugs]
Obs |
id |
service_start_dt |
service_end_dt |
drug |
duration |
drug_start_dt |
drug_end_dt |
1 |
1 |
09/23/2013 |
09/23/2016 |
0 |
436 |
09/23/2013 |
12/02/2014 |
2 |
1 |
09/23/2013 |
09/23/2016 |
1 |
2 |
12/03/2014 |
12/04/2014 |
3 |
1 |
09/23/2013 |
09/23/2016 |
0 |
166 |
12/05/2014 |
05/19/2015 |
4 |
1 |
09/23/2013 |
09/23/2016 |
1 |
2 |
05/20/2015 |
05/21/2015 |
5 |
2 |
01/20/2012 |
09/23/2016 |
0 |
106 |
01/20/2012 |
05/04/2012 |
6 |
2 |
01/20/2012 |
09/23/2016 |
1 |
31 |
05/05/2012 |
06/04/2012 |
7 |
2 |
01/20/2012 |
09/23/2016 |
0 |
55 |
06/05/2012 |
07/29/2012 |
8 |
2 |
01/20/2012 |
09/23/2016 |
1 |
31 |
07/30/2012 |
08/29/2012 |
This is the table I am trying to create:
Obs |
id |
service_start_dt |
service_end_dt |
drug |
duration |
start_date |
end_date |
1 |
1 |
09/23/2013 |
09/23/2016 |
0 |
436 |
09/23/2013 |
12/02/2014 |
2 |
1 |
09/23/2013 |
09/23/2016 |
1 |
2 |
12/03/2014 |
12/04/2014 |
3 |
1 |
09/23/2013 |
09/23/2016 |
0 |
166 |
12/05/2014 |
05/19/2015 |
4 |
1 |
09/23/2013 |
09/23/2016 |
1 |
2 |
05/20/2015 |
05/21/2015 |
5 |
1 |
09/23/2013 |
09/23/2016 |
0 |
487 |
05/22/2015 |
09/23/2016 |
6 |
2 |
01/20/2012 |
09/23/2016 |
0 |
106 |
01/20/2012 |
05/04/2012 |
7 |
2 |
01/20/2012 |
09/23/2016 |
1 |
31 |
05/05/2012 |
06/04/2012 |
8 |
2 |
01/20/2012 |
09/23/2016 |
0 |
55 |
06/05/2012 |
07/29/2012 |
9 |
2 |
01/20/2012 |
09/23/2016 |
1 |
31 |
07/30/2012 |
08/29/2012 |
10 |
2 |
01/20/2012 |
09/23/2016 |
0 |
1487 |
08/30/2012 |
09/23/2016 |
Thank you for noticing the issue with my example data.
OK. Actually this question is the same as original one.
data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;
data have_drug;
set have;
do date=drug_start_dt to drug_end_dt;
output;
end;
format date mmddyy10.;
keep id date drug;
run;
proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
from have ;
quit;
data temp1;
set temp;
do date=min to max;
output;
end;
format date mmddyy10.;
keep id date;
run;
proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
from temp1 as a left join have_drug as b
on a.id=b.id and a.date=b.date
order by id,date;
quit;
proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;
data want;
merge temp(rename=(min=service_start_dt max=service_end_dt))
all_data1(drop=_type_ rename=(_freq_=duration));
by id;
run;
Thank you!
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.