BookmarkSubscribeRSS Feed
☑ This topic is solved. Need further help from the community? Please sign in and ask a new question.
dac_js
Quartz | Level 8

I have data in  the following format:

 

data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2014 05/21/2014 1
2 01/20/2012 05/05/2011 06/04/2011 1
2 01/20/2012 07/30/2011 08/29/2011 1
;
run;

 

Here start date and end date are the dates between which patients are taking drugs (drug = 1). Need to create observations when patients are not taking drugs (drug = 0).

The date count should start from the service day. Is there any way to arrange the data in the following format:

Observations ID Service date Start date End Date Drug Duration
1 1 9/23/2013 9/23/2013 12/3/2014 0 436
2 1 9/23/2013 12/3/2014 12/4/2014 1 1
3 1 9/23/2013 12/4/2014 5/20/2015 0 167
4 1 9/23/2013 5/20/2015 5/21/2015 1 1
5 2 1/20/2012 1/20/2012 5/5/2012 0 106
6 2 1/20/2012 5/5/2012 6/4/2012 1 30
7 2 1/20/2012 6/4/2012 7/30/2012 0 56
8 2 1/20/2012 7/30/2012 8/29/2012 1 30

 

Thanks in advance!

 

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

OK. Actually this question is the same as original one.

 

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=drug_start_dt to drug_end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
 from have ;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(rename=(min=service_start_dt max=service_end_dt))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;

View solution in original post

7 REPLIES 7
mkeintz
PROC Star

In your desired result sample, why are start_dt values equal to the previous end_dt instead instead of one day after?  Also your sample dates in the data step do not match the dates in the result table.

 

data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1    (was 05/20/2014 and 05/21/2014)
2 01/20/2012 05/05/2012 06/04/2012 1    (was 05/05/2011 and 06/04/2011)
2 01/20/2012 07/30/2012 08/29/2012 1    (was 07/30/2011 and 08/29/2011)
;
run;

data want (drop=_:);
  set have;
  by id service_dt;

  _start_zero=ifn(first.service_dt=1,service_dt,lag(end_dt));
  if _start_zero^=start_dt then do;
    drug=0;
    end_dt=start_dt;
    start_dt=_start_zero;
    duration=end_dt-start_dt;
    output;
  end;
  set have;   **reread the observation to restore the drug=1 date values**;
    duration=end_dt-start_dt;
  output;
run;

--------------------------
The hash OUTPUT method will overwrite a SAS data set, but not append. That can be costly. Consider voting for Add a HASH object method which would append a hash object to an existing SAS data set

Would enabling PROC SORT to simultaneously output multiple datasets be useful? Then vote for
Allow PROC SORT to output multiple datasets

--------------------------
Ksharp
Super User
data have;
input id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1
2 01/20/2012 05/05/2012 06/04/2012 1
2 01/20/2012 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=start_dt to end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select id,min(service_dt) as min format=mmddyy10.,max(end_dt) as max format=mmddyy10.
 from have 
  group by id;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(keep=id min rename=(min=Service_date))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;
dac_js
Quartz | Level 8

Thank you for the help!

 

I have one more question. With the same data if I want to create one more row for all individuals showing the time between last day patient have drug (drug_end_dt) to the last day patient was observed (service_end_dt), should I follow the same steps? It would be very helpful if you can show where in the code I need to make this change?

The same data with one more variable is below:

 

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;

 

And I am trying to create a table like below. [Here patient receive service between the service_start_dt  and service_end_dt . Within this period there are times when patient can or cannot take the drug. between drug_start_dt  and drug_end_dt is thh time when patients are taking the drugs]

 

Obs

id

service_start_dt

service_end_dt

drug

duration

drug_start_dt

drug_end_dt

1

1

09/23/2013

09/23/2016

0

436

09/23/2013

12/02/2014

2

1

09/23/2013

09/23/2016

1

2

12/03/2014

12/04/2014

3

1

09/23/2013

09/23/2016

0

166

12/05/2014

05/19/2015

4

1

09/23/2013

09/23/2016

1

2

05/20/2015

05/21/2015

5

2

01/20/2012

09/23/2016

0

106

01/20/2012

05/04/2012

6

2

01/20/2012

09/23/2016

1

31

05/05/2012

06/04/2012

7

2

01/20/2012

09/23/2016

0

55

06/05/2012

07/29/2012

8

2

01/20/2012

09/23/2016

1

31

07/30/2012

08/29/2012

dac_js
Quartz | Level 8

This is the table I am trying to create:

 

Obs

id

service_start_dt

service_end_dt

drug

duration

start_date

end_date

1

1

09/23/2013

09/23/2016

0

436

09/23/2013

12/02/2014

2

1

09/23/2013

09/23/2016

1

2

12/03/2014

12/04/2014

3

1

09/23/2013

09/23/2016

0

166

12/05/2014

05/19/2015

4

1

09/23/2013

09/23/2016

1

2

05/20/2015

05/21/2015

5

1

09/23/2013

09/23/2016

0

487

05/22/2015

09/23/2016

6

2

01/20/2012

09/23/2016

0

106

01/20/2012

05/04/2012

7

2

01/20/2012

09/23/2016

1

31

05/05/2012

06/04/2012

8

2

01/20/2012

09/23/2016

0

55

06/05/2012

07/29/2012

9

2

01/20/2012

09/23/2016

1

31

07/30/2012

08/29/2012

10

2

01/20/2012

09/23/2016

0

1487

08/30/2012

09/23/2016

dac_js
Quartz | Level 8

Thank you for noticing the issue with my example data.

Ksharp
Super User

OK. Actually this question is the same as original one.

 

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=drug_start_dt to drug_end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
 from have ;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(rename=(min=service_start_dt max=service_end_dt))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;
dac_js
Quartz | Level 8

Thank you! 

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 7 replies
  • 1980 views
  • 0 likes
  • 3 in conversation