Solved: Re: Creating observation for days without drugs

dac_js · Posted 06-14-2022 07:52 PM

I have data in the following format:

data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2014 05/21/2014 1
2 01/20/2012 05/05/2011 06/04/2011 1
2 01/20/2012 07/30/2011 08/29/2011 1
;
run;

Here start date and end date are the dates between which patients are taking drugs (drug = 1). Need to create observations when patients are not taking drugs (drug = 0).

The date count should start from the service day. Is there any way to arrange the data in the following format:

Observations	ID	Service date	Start date	End Date	Drug	Duration
1	1	9/23/2013	9/23/2013	12/3/2014	0	436
2	1	9/23/2013	12/3/2014	12/4/2014	1	1
3	1	9/23/2013	12/4/2014	5/20/2015	0	167
4	1	9/23/2013	5/20/2015	5/21/2015	1	1
5	2	1/20/2012	1/20/2012	5/5/2012	0	106
6	2	1/20/2012	5/5/2012	6/4/2012	1	30
7	2	1/20/2012	6/4/2012	7/30/2012	0	56
8	2	1/20/2012	7/30/2012	8/29/2012	1	30

Thanks in advance!

Ksharp · Posted 06-17-2022 07:56 AM

OK. Actually this question is the same as original one.

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=drug_start_dt to drug_end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
 from have ;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(rename=(min=service_start_dt max=service_end_dt))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;

View solution in original post

mkeintz · Posted 06-15-2022 06:56 AM

In your desired result sample, why are start_dt values equal to the previous end_dt instead instead of one day after? Also your sample dates in the data step do not match the dates in the result table.

data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1    (was 05/20/2014 and 05/21/2014)
2 01/20/2012 05/05/2012 06/04/2012 1    (was 05/05/2011 and 06/04/2011)
2 01/20/2012 07/30/2012 08/29/2012 1    (was 07/30/2011 and 08/29/2011)
;
run;

data want (drop=_:);
  set have;
  by id service_dt;

  _start_zero=ifn(first.service_dt=1,service_dt,lag(end_dt));
  if _start_zero^=start_dt then do;
    drug=0;
    end_dt=start_dt;
    start_dt=_start_zero;
    duration=end_dt-start_dt;
    output;
  end;
  set have;   **reread the observation to restore the drug=1 date values**;
    duration=end_dt-start_dt;
  output;
run;

--------------------------
The hash OUTPUT method will overwrite a SAS data set, but not append. That can be costly. Consider voting for Add a HASH object method which would append a hash object to an existing SAS data set

Would enabling PROC SORT to simultaneously output multiple datasets be useful? Then vote for
Allow PROC SORT to output multiple datasets

--------------------------

Ksharp · Posted 06-15-2022 07:59 AM

data have;
input id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1
2 01/20/2012 05/05/2012 06/04/2012 1
2 01/20/2012 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=start_dt to end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select id,min(service_dt) as min format=mmddyy10.,max(end_dt) as max format=mmddyy10.
 from have 
  group by id;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(keep=id min rename=(min=Service_date))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;

dac_js · Posted 06-16-2022 11:05 PM

Thank you for the help!

I have one more question. With the same data if I want to create one more row for all individuals showing the time between last day patient have drug (drug_end_dt) to the last day patient was observed (service_end_dt), should I follow the same steps? It would be very helpful if you can show where in the code I need to make this change?

The same data with one more variable is below:

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;

And I am trying to create a table like below. [Here patient receive service between the service_start_dt and service_end_dt . Within this period there are times when patient can or cannot take the drug. between drug_start_dt and drug_end_dt is thh time when patients are taking the drugs]

Obs	id	service_start_dt	service_end_dt	drug	duration	drug_start_dt	drug_end_dt
1	1	09/23/2013	09/23/2016	0	436	09/23/2013	12/02/2014
2	1	09/23/2013	09/23/2016	1	2	12/03/2014	12/04/2014
3	1	09/23/2013	09/23/2016	0	166	12/05/2014	05/19/2015
4	1	09/23/2013	09/23/2016	1	2	05/20/2015	05/21/2015
5	2	01/20/2012	09/23/2016	0	106	01/20/2012	05/04/2012
6	2	01/20/2012	09/23/2016	1	31	05/05/2012	06/04/2012
7	2	01/20/2012	09/23/2016	0	55	06/05/2012	07/29/2012
8	2	01/20/2012	09/23/2016	1	31	07/30/2012	08/29/2012

dac_js · Posted 06-16-2022 11:10 PM

This is the table I am trying to create:

Obs	id	service_start_dt	service_end_dt	drug	duration	start_date	end_date
1	1	09/23/2013	09/23/2016	0	436	09/23/2013	12/02/2014
2	1	09/23/2013	09/23/2016	1	2	12/03/2014	12/04/2014
3	1	09/23/2013	09/23/2016	0	166	12/05/2014	05/19/2015
4	1	09/23/2013	09/23/2016	1	2	05/20/2015	05/21/2015
5	1	09/23/2013	09/23/2016	0	487	05/22/2015	09/23/2016
6	2	01/20/2012	09/23/2016	0	106	01/20/2012	05/04/2012
7	2	01/20/2012	09/23/2016	1	31	05/05/2012	06/04/2012
8	2	01/20/2012	09/23/2016	0	55	06/05/2012	07/29/2012
9	2	01/20/2012	09/23/2016	1	31	07/30/2012	08/29/2012
10	2	01/20/2012	09/23/2016	0	1487	08/30/2012	09/23/2016

dac_js · Posted 06-16-2022 10:48 PM

Thank you for noticing the issue with my example data.

Ksharp · Posted 06-17-2022 07:56 AM

OK. Actually this question is the same as original one.

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=drug_start_dt to drug_end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
 from have ;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(rename=(min=service_start_dt max=service_end_dt))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;

dac_js · Posted 06-17-2022 04:31 PM

Thank you!

SAS Training: Just a Click Away