BookmarkSubscribeRSS Feed
☑ This topic is solved. Need further help from the community? Please sign in and ask a new question.
dac_js
Quartz | Level 8

I have data in  the following format:

 

data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2014 05/21/2014 1
2 01/20/2012 05/05/2011 06/04/2011 1
2 01/20/2012 07/30/2011 08/29/2011 1
;
run;

 

Here start date and end date are the dates between which patients are taking drugs (drug = 1). Need to create observations when patients are not taking drugs (drug = 0).

The date count should start from the service day. Is there any way to arrange the data in the following format:

Observations ID Service date Start date End Date Drug Duration
1 1 9/23/2013 9/23/2013 12/3/2014 0 436
2 1 9/23/2013 12/3/2014 12/4/2014 1 1
3 1 9/23/2013 12/4/2014 5/20/2015 0 167
4 1 9/23/2013 5/20/2015 5/21/2015 1 1
5 2 1/20/2012 1/20/2012 5/5/2012 0 106
6 2 1/20/2012 5/5/2012 6/4/2012 1 30
7 2 1/20/2012 6/4/2012 7/30/2012 0 56
8 2 1/20/2012 7/30/2012 8/29/2012 1 30

 

Thanks in advance!

 

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

OK. Actually this question is the same as original one.

 

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=drug_start_dt to drug_end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
 from have ;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(rename=(min=service_start_dt max=service_end_dt))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;

View solution in original post

7 REPLIES 7
mkeintz
PROC Star

In your desired result sample, why are start_dt values equal to the previous end_dt instead instead of one day after?  Also your sample dates in the data step do not match the dates in the result table.

 

data have;
input
id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1    (was 05/20/2014 and 05/21/2014)
2 01/20/2012 05/05/2012 06/04/2012 1    (was 05/05/2011 and 06/04/2011)
2 01/20/2012 07/30/2012 08/29/2012 1    (was 07/30/2011 and 08/29/2011)
;
run;

data want (drop=_:);
  set have;
  by id service_dt;

  _start_zero=ifn(first.service_dt=1,service_dt,lag(end_dt));
  if _start_zero^=start_dt then do;
    drug=0;
    end_dt=start_dt;
    start_dt=_start_zero;
    duration=end_dt-start_dt;
    output;
  end;
  set have;   **reread the observation to restore the drug=1 date values**;
    duration=end_dt-start_dt;
  output;
run;

--------------------------
The hash OUTPUT method will overwrite a SAS data set, but not append. That can be costly. Consider voting for Add a HASH object method which would append a hash object to an existing SAS data set

Would enabling PROC SORT to simultaneously output multiple datasets be useful? Then vote for
Allow PROC SORT to output multiple datasets

--------------------------
Ksharp
Super User
data have;
input id service_dt :mmddyy10. start_dt :mmddyy10. end_dt :mmddyy10. drug;
format service_dt mmddyy10. start_dt mmddyy10. end_dt mmddyy10. ;
cards;
1 09/23/2013 12/03/2014 12/04/2014 1
1 09/23/2013 05/20/2015 05/21/2015 1
2 01/20/2012 05/05/2012 06/04/2012 1
2 01/20/2012 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=start_dt to end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select id,min(service_dt) as min format=mmddyy10.,max(end_dt) as max format=mmddyy10.
 from have 
  group by id;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(keep=id min rename=(min=Service_date))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;
dac_js
Quartz | Level 8

Thank you for the help!

 

I have one more question. With the same data if I want to create one more row for all individuals showing the time between last day patient have drug (drug_end_dt) to the last day patient was observed (service_end_dt), should I follow the same steps? It would be very helpful if you can show where in the code I need to make this change?

The same data with one more variable is below:

 

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;

 

And I am trying to create a table like below. [Here patient receive service between the service_start_dt  and service_end_dt . Within this period there are times when patient can or cannot take the drug. between drug_start_dt  and drug_end_dt is thh time when patients are taking the drugs]

 

Obs

id

service_start_dt

service_end_dt

drug

duration

drug_start_dt

drug_end_dt

1

1

09/23/2013

09/23/2016

0

436

09/23/2013

12/02/2014

2

1

09/23/2013

09/23/2016

1

2

12/03/2014

12/04/2014

3

1

09/23/2013

09/23/2016

0

166

12/05/2014

05/19/2015

4

1

09/23/2013

09/23/2016

1

2

05/20/2015

05/21/2015

5

2

01/20/2012

09/23/2016

0

106

01/20/2012

05/04/2012

6

2

01/20/2012

09/23/2016

1

31

05/05/2012

06/04/2012

7

2

01/20/2012

09/23/2016

0

55

06/05/2012

07/29/2012

8

2

01/20/2012

09/23/2016

1

31

07/30/2012

08/29/2012

dac_js
Quartz | Level 8

This is the table I am trying to create:

 

Obs

id

service_start_dt

service_end_dt

drug

duration

start_date

end_date

1

1

09/23/2013

09/23/2016

0

436

09/23/2013

12/02/2014

2

1

09/23/2013

09/23/2016

1

2

12/03/2014

12/04/2014

3

1

09/23/2013

09/23/2016

0

166

12/05/2014

05/19/2015

4

1

09/23/2013

09/23/2016

1

2

05/20/2015

05/21/2015

5

1

09/23/2013

09/23/2016

0

487

05/22/2015

09/23/2016

6

2

01/20/2012

09/23/2016

0

106

01/20/2012

05/04/2012

7

2

01/20/2012

09/23/2016

1

31

05/05/2012

06/04/2012

8

2

01/20/2012

09/23/2016

0

55

06/05/2012

07/29/2012

9

2

01/20/2012

09/23/2016

1

31

07/30/2012

08/29/2012

10

2

01/20/2012

09/23/2016

0

1487

08/30/2012

09/23/2016

dac_js
Quartz | Level 8

Thank you for noticing the issue with my example data.

Ksharp
Super User

OK. Actually this question is the same as original one.

 

data have;
input id service_start_dt :mmddyy10. service_end_dt :mmddyy10. drug_start_dt :mmddyy10. drug_end_dt :mmddyy10. drug;
format service_start_dt service_end_dt mmddyy10. drug_start_dt mmddyy10. drug_end_dt mmddyy10. ;
cards;
1 09/23/2013 09/23/2016 12/03/2014 12/04/2014 1
1 09/23/2013 09/23/2016 05/20/2015 05/21/2015 1
2 01/20/2012 09/23/2016 05/05/2012 06/04/2012 1
2 01/20/2012 09/23/2016 07/30/2012 08/29/2012 1
;
run;


data have_drug;
 set have;
 do date=drug_start_dt to drug_end_dt;
  output;
 end;
format date mmddyy10.;
keep id date drug; 
run;


proc sql;
create table temp as
select distinct id,service_start_dt as min format=mmddyy10.,service_end_dt as max format=mmddyy10.
 from have ;
quit;
data temp1;
 set temp;
 do date=min to max;
  output;
 end;
format date mmddyy10.;
keep id date;
run;

proc sql;
create table all_data as
select a.*,coalesce(b.drug,0) as drug
 from temp1 as a left join have_drug as b
  on a.id=b.id and a.date=b.date 
   order by id,date;
quit;

proc summary data=all_data;
by id drug notsorted;
var date;
output out=all_data1 min=start_date max=end_date;
run;

data want;
 merge temp(rename=(min=service_start_dt max=service_end_dt))
       all_data1(drop=_type_ rename=(_freq_=duration));
 by id;
run;
dac_js
Quartz | Level 8

Thank you! 

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 7 replies
  • 1884 views
  • 0 likes
  • 3 in conversation