Solved: first date with label

lillymaginta · Posted 05-16-2020 01:13 PM

data test;
input id$2. start drug;
attrib start format =date9. informat=date9.;

datalines;
1 01JAN2015 1
1 18FEB2015  2
2 01jan2016  2
2 01apr2016  1
3 01JAN2015 1
3 01JAN2015  2
4 01jan2017  2
4 01apr2017  2
;
run;

I am trying to find the first of each drug per id. Retain the first as "first" and indicate the drug name under "drug". And return the start of the other drug as "comp". If both drugs started the same day then output "N".

output

id first comp drug

1 01JAN2015 18FEB2015 1

2 01JAN2016 01apr2016 2

3 01JAN2015 01JAN2015 N

4 01jan2017 . 2

ed_sas_member · Posted 05-17-2020 05:43 AM

Hi @lillymaginta

Here is an attempt to achieve this. Please review the way to compute COMP date.

From your explanations, I understand that there are 3 cases:

CASE1: the subject takes only one drug (id:4) -> we consider that comp=. has it it is ongoing
CASE2: the subject takes multiples drugs all at the same date and no next drug (id:3) -> we consider that comp= start
CASE3: the subject takes one or more drugs following the initial one (id:1, 2) -> we consider that comp= first intake date of the next drug

Best,

data test;
	input id$2. start drug;
	attrib start format=date9. informat=date9.;
	datalines;
1 01JAN2015 1
1 18FEB2015  2
2 01jan2016  2
2 01apr2016  1
3 01JAN2015 1
3 01JAN2015  2
4 01jan2017  2
4 01apr2017  2
;
run;

proc sort data=test out=test_sorted;
	by id start;
run;

/*** If both initial drugs started the same day then output "N" */

data test2;
	set test_sorted;
	by id start;
	
	if first.start then drugc = put(drug, 1.);
	else drugc = "N";
	
	drop drug;
	rename drugc=drug;
	
	if last.start then output;
run;

/*** Flag first drug and first drug intake */

data test3;
	set test2;
	by id start;	
	if first.id then flag_drug1_intake1=1;
	else flag_drug1_intake1=0;
run;

proc sql;
	create table test4 as
	select a.id, a.start, a.drug, case when b.flag_drug1_intake1 then 1 else 0 end as flag_drug1, a.flag_drug1_intake1
	from test3 as a
		 left join
		 (select * from test3 where flag_drug1_intake1=1) as b
	on a.id = b.id and a.drug = b.drug
	order by a.id, a.start;
quit;

/*** Identify COMP date */
	
	/*** CASE1: ID with ongoing first 'unique' drug
		 COMP = .*/
	
	proc sql;
		create table ongoing_unique as
		select distinct id, . as comp
		from test4
		group by id
		having count(distinct flag_drug1)=1 and drug ne "N";
	quit;

	/*** CASE2: ID with ongoing first 'multiples' drug
		 COMP = start */
	proc sql;
		create table ongoing_multiple as
		select distinct id, start as comp
		from test4
		group by id
		having count(distinct flag_drug1)=1 and drug="N";
	quit;

	/*** CASE3: ID with other drug(s) following the initial one
		 COMP = first intake date of the next drug */
	proc sql;
		create table ID_other as
		select *
		from test4
		group by id
		having count(distinct flag_drug1)>1
		order by ID, flag_drug1, start;
	quit;
		/* Retrieve COMP date */
		data other;
			set ID_other;
			by ID flag_drug1 start ;
			if first.flag_drug1 and flag_drug1 ne 1;
			keep ID start;
			rename start = comp;
		run;

/*** Merge all couples ID - COMP */
data want_comp;
	merge ongoing_unique ongoing_multiple other;
	by ID comp;
run;

/*** FINAL MERGE */
data want;
	merge test4 (where=(flag_drug1_intake1=1)) want_comp;
	by id;
	drop flag:;
run;

Output:

View solution in original post

PaigeMiller · Posted 05-16-2020 04:01 PM

If there are always two records for each ID then PROC SUMMARY ... wait, I don't want to assume ... are there always going to be 2 records for each ID?

Why is there no COMP for ID = 4?

--
Paige Miller

lillymaginta · Posted 05-19-2020 10:56 AM

Hi PaigeMiller, there are plenty of records 30+, I just listed two to simplify the idea

ed_sas_member · Posted 05-17-2020 05:43 AM

Hi @lillymaginta

Here is an attempt to achieve this. Please review the way to compute COMP date.

From your explanations, I understand that there are 3 cases:

CASE1: the subject takes only one drug (id:4) -> we consider that comp=. has it it is ongoing
CASE2: the subject takes multiples drugs all at the same date and no next drug (id:3) -> we consider that comp= start
CASE3: the subject takes one or more drugs following the initial one (id:1, 2) -> we consider that comp= first intake date of the next drug

Best,

data test;
	input id$2. start drug;
	attrib start format=date9. informat=date9.;
	datalines;
1 01JAN2015 1
1 18FEB2015  2
2 01jan2016  2
2 01apr2016  1
3 01JAN2015 1
3 01JAN2015  2
4 01jan2017  2
4 01apr2017  2
;
run;

proc sort data=test out=test_sorted;
	by id start;
run;

/*** If both initial drugs started the same day then output "N" */

data test2;
	set test_sorted;
	by id start;
	
	if first.start then drugc = put(drug, 1.);
	else drugc = "N";
	
	drop drug;
	rename drugc=drug;
	
	if last.start then output;
run;

/*** Flag first drug and first drug intake */

data test3;
	set test2;
	by id start;	
	if first.id then flag_drug1_intake1=1;
	else flag_drug1_intake1=0;
run;

proc sql;
	create table test4 as
	select a.id, a.start, a.drug, case when b.flag_drug1_intake1 then 1 else 0 end as flag_drug1, a.flag_drug1_intake1
	from test3 as a
		 left join
		 (select * from test3 where flag_drug1_intake1=1) as b
	on a.id = b.id and a.drug = b.drug
	order by a.id, a.start;
quit;

/*** Identify COMP date */
	
	/*** CASE1: ID with ongoing first 'unique' drug
		 COMP = .*/
	
	proc sql;
		create table ongoing_unique as
		select distinct id, . as comp
		from test4
		group by id
		having count(distinct flag_drug1)=1 and drug ne "N";
	quit;

	/*** CASE2: ID with ongoing first 'multiples' drug
		 COMP = start */
	proc sql;
		create table ongoing_multiple as
		select distinct id, start as comp
		from test4
		group by id
		having count(distinct flag_drug1)=1 and drug="N";
	quit;

	/*** CASE3: ID with other drug(s) following the initial one
		 COMP = first intake date of the next drug */
	proc sql;
		create table ID_other as
		select *
		from test4
		group by id
		having count(distinct flag_drug1)>1
		order by ID, flag_drug1, start;
	quit;
		/* Retrieve COMP date */
		data other;
			set ID_other;
			by ID flag_drug1 start ;
			if first.flag_drug1 and flag_drug1 ne 1;
			keep ID start;
			rename start = comp;
		run;

/*** Merge all couples ID - COMP */
data want_comp;
	merge ongoing_unique ongoing_multiple other;
	by ID comp;
run;

/*** FINAL MERGE */
data want;
	merge test4 (where=(flag_drug1_intake1=1)) want_comp;
	by id;
	drop flag:;
run;

Output:

first date with label

Re: first date with label

Re: first date with label

Re: first date with label

Re: first date with label

first date with label

Re: first date with label

Re: first date with label

Re: first date with label

Re: first date with label

Click image to register for webinar

Classroom Training Available!