data test;
input id$2. start drug;
attrib start format =date9. informat=date9.;
datalines;
1 01JAN2015 1
1 18FEB2015 2
2 01jan2016 2
2 01apr2016 1
3 01JAN2015 1
3 01JAN2015 2
4 01jan2017 2
4 01apr2017 2
;
run;
I am trying to find the first of each drug per id. Retain the first as "first" and indicate the drug name under "drug". And return the start of the other drug as "comp". If both drugs started the same day then output "N".
output
id first comp drug
1 01JAN2015 18FEB2015 1
2 01JAN2016 01apr2016 2
3 01JAN2015 01JAN2015 N
4 01jan2017 . 2
Here is an attempt to achieve this. Please review the way to compute COMP date.
From your explanations, I understand that there are 3 cases:
Best,
data test;
input id$2. start drug;
attrib start format=date9. informat=date9.;
datalines;
1 01JAN2015 1
1 18FEB2015 2
2 01jan2016 2
2 01apr2016 1
3 01JAN2015 1
3 01JAN2015 2
4 01jan2017 2
4 01apr2017 2
;
run;
proc sort data=test out=test_sorted;
by id start;
run;
/*** If both initial drugs started the same day then output "N" */
data test2;
set test_sorted;
by id start;
if first.start then drugc = put(drug, 1.);
else drugc = "N";
drop drug;
rename drugc=drug;
if last.start then output;
run;
/*** Flag first drug and first drug intake */
data test3;
set test2;
by id start;
if first.id then flag_drug1_intake1=1;
else flag_drug1_intake1=0;
run;
proc sql;
create table test4 as
select a.id, a.start, a.drug, case when b.flag_drug1_intake1 then 1 else 0 end as flag_drug1, a.flag_drug1_intake1
from test3 as a
left join
(select * from test3 where flag_drug1_intake1=1) as b
on a.id = b.id and a.drug = b.drug
order by a.id, a.start;
quit;
/*** Identify COMP date */
/*** CASE1: ID with ongoing first 'unique' drug
COMP = .*/
proc sql;
create table ongoing_unique as
select distinct id, . as comp
from test4
group by id
having count(distinct flag_drug1)=1 and drug ne "N";
quit;
/*** CASE2: ID with ongoing first 'multiples' drug
COMP = start */
proc sql;
create table ongoing_multiple as
select distinct id, start as comp
from test4
group by id
having count(distinct flag_drug1)=1 and drug="N";
quit;
/*** CASE3: ID with other drug(s) following the initial one
COMP = first intake date of the next drug */
proc sql;
create table ID_other as
select *
from test4
group by id
having count(distinct flag_drug1)>1
order by ID, flag_drug1, start;
quit;
/* Retrieve COMP date */
data other;
set ID_other;
by ID flag_drug1 start ;
if first.flag_drug1 and flag_drug1 ne 1;
keep ID start;
rename start = comp;
run;
/*** Merge all couples ID - COMP */
data want_comp;
merge ongoing_unique ongoing_multiple other;
by ID comp;
run;
/*** FINAL MERGE */
data want;
merge test4 (where=(flag_drug1_intake1=1)) want_comp;
by id;
drop flag:;
run;
Output:
If there are always two records for each ID then PROC SUMMARY ... wait, I don't want to assume ... are there always going to be 2 records for each ID?
Why is there no COMP for ID = 4?
Here is an attempt to achieve this. Please review the way to compute COMP date.
From your explanations, I understand that there are 3 cases:
Best,
data test;
input id$2. start drug;
attrib start format=date9. informat=date9.;
datalines;
1 01JAN2015 1
1 18FEB2015 2
2 01jan2016 2
2 01apr2016 1
3 01JAN2015 1
3 01JAN2015 2
4 01jan2017 2
4 01apr2017 2
;
run;
proc sort data=test out=test_sorted;
by id start;
run;
/*** If both initial drugs started the same day then output "N" */
data test2;
set test_sorted;
by id start;
if first.start then drugc = put(drug, 1.);
else drugc = "N";
drop drug;
rename drugc=drug;
if last.start then output;
run;
/*** Flag first drug and first drug intake */
data test3;
set test2;
by id start;
if first.id then flag_drug1_intake1=1;
else flag_drug1_intake1=0;
run;
proc sql;
create table test4 as
select a.id, a.start, a.drug, case when b.flag_drug1_intake1 then 1 else 0 end as flag_drug1, a.flag_drug1_intake1
from test3 as a
left join
(select * from test3 where flag_drug1_intake1=1) as b
on a.id = b.id and a.drug = b.drug
order by a.id, a.start;
quit;
/*** Identify COMP date */
/*** CASE1: ID with ongoing first 'unique' drug
COMP = .*/
proc sql;
create table ongoing_unique as
select distinct id, . as comp
from test4
group by id
having count(distinct flag_drug1)=1 and drug ne "N";
quit;
/*** CASE2: ID with ongoing first 'multiples' drug
COMP = start */
proc sql;
create table ongoing_multiple as
select distinct id, start as comp
from test4
group by id
having count(distinct flag_drug1)=1 and drug="N";
quit;
/*** CASE3: ID with other drug(s) following the initial one
COMP = first intake date of the next drug */
proc sql;
create table ID_other as
select *
from test4
group by id
having count(distinct flag_drug1)>1
order by ID, flag_drug1, start;
quit;
/* Retrieve COMP date */
data other;
set ID_other;
by ID flag_drug1 start ;
if first.flag_drug1 and flag_drug1 ne 1;
keep ID start;
rename start = comp;
run;
/*** Merge all couples ID - COMP */
data want_comp;
merge ongoing_unique ongoing_multiple other;
by ID comp;
run;
/*** FINAL MERGE */
data want;
merge test4 (where=(flag_drug1_intake1=1)) want_comp;
by id;
drop flag:;
run;
Output:
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.