Hi all,
I have this dataset ;
sasuser.sample1_drugs
Obs PTID DRUG_DT DRUG123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
PT078375333 | 31DEC2014 | ETOPOSIDE |
PT078375333 | 01JAN2015 | CISPLATIN |
PT078375333 | 01JAN2015 | ETOPOSIDE |
PT078375333 | 02JAN2015 | ETOPOSIDE |
PT078375333 | 21JAN2015 | CISPLATIN |
PT078375333 | 21JAN2015 | ETOPOSIDE |
PT078375333 | 22JAN2015 | ETOPOSIDE |
PT078375333 | 23JAN2015 | ETOPOSIDE |
PT078375333 | 11FEB2015 | CISPLATIN |
PT078375333 | 11FEB2015 | ETOPOSIDE |
PT078375333 | 12FEB2015 | ETOPOSIDE |
PT078375333 | 13FEB2015 | ETOPOSIDE |
PT078375333 | 04MAR2015 | CISPLATIN |
PT078375333 | 05MAR2015 | ETOPOSIDE |
PT078375333 | 06MAR2015 | ETOPOSIDE |
PT078837214 | 13NOV2013 | CARBOPLATIN |
PT078837214 | 13NOV2013 | ETOPOSIDE |
PT078837214 | 11DEC2013 | CARBOPLATIN |
PT078837214 | 11DEC2013 | ETOPOSIDE |
PT078837214 | 12DEC2013 | ETOPOSIDE |
PT082388044 | 18MAR2013 | CARBOPLATIN |
PT082388044 | 18MAR2013 | ETOPOSIDE |
PT082388044 | 19MAR2013 | ETOPOSIDE |
PT082388044 | 20MAR2013 | ETOPOSIDE |
PT082388044 | 08APR2013 | CARBOPLATIN |
PT082388044 | 08APR2013 | ETOPOSIDE |
PT082388044 | 09APR2013 | ETOPOSIDE |
PT082388044 | 10APR2013 | ETOPOSIDE |
PT082388044 | 01MAY2013 | CARBOPLATIN |
PT082388044 | 01MAY2013 | ETOPOSIDE |
PT082388044 | 02MAY2013 | ETOPOSIDE |
PT082388044 | 03MAY2013 | ETOPOSIDE |
PT082388044 | 22MAY2013 | CARBOPLATIN |
PT082388044 | 22MAY2013 | ETOPOSIDE |
PT082388044 | 23MAY2013 | ETOPOSIDE |
PT082388044 | 24MAY2013 | ETOPOSIDE |
PT082388044 | 19JAN2015 | CARBOPLATIN |
PT082388044 | 19JAN2015 | ETOPOSIDE |
PT082388044 | 20JAN2015 | ETOPOSIDE |
PT082388044 | 21JAN2015 | ETOPOSIDE |
PT082388044 | 09FEB2015 | CARBOPLATIN |
PT082388044 | 09FEB2015 | ETOPOSIDE |
PT082388044 | 10FEB2015 | ETOPOSIDE |
PT082388044 | 11FEB2015 | ETOPOSIDE |
PT082388044 | 03MAR2015 | CARBOPLATIN |
PT082388044 | 03MAR2015 | ETOPOSIDE |
PT082388044 | 04MAR2015 | ETOPOSIDE |
PT082388044 | 05MAR2015 | ETOPOSIDE |
PT082388044 | 24MAR2015 | CARBOPLATIN |
PT082388044 | 24MAR2015 | ETOPOSIDE |
PT082388044 | 25MAR2015 | ETOPOSIDE |
PT082388044 | 26MAR2015 | ETOPOSIDE |
PT082388044 | 02FEB2016 | CARBOPLATIN |
PT082388044 | 02FEB2016 | ETOPOSIDE |
PT082388044 | 03FEB2016 | ETOPOSIDE |
PT082388044 | 04FEB2016 | ETOPOSIDE |
PT082388044 | 23FEB2016 | CARBOPLATIN |
PT082388044 | 23FEB2016 | ETOPOSIDE |
PT082388044 | 24FEB2016 | ETOPOSIDE |
PT082388044 | 25FEB2016 | ETOPOSIDE |
PT082388044 | 15MAR2016 | CARBOPLATIN |
PT082388044 | 15MAR2016 | ETOPOSIDE |
PT082388044 | 16MAR2016 | ETOPOSIDE |
PT082388044 | 17MAR2016 | ETOPOSIDE |
PT083585377 | 17APR2012 | CARBOPLATIN |
PT083585377 | 17APR2012 | ETOPOSIDE |
PT083585377 | 18APR2012 | ETOPOSIDE |
PT083585377 | 19APR2012 | ETOPOSIDE |
PT083585377 | 09MAY2012 | CARBOPLATIN |
PT083585377 | 09MAY2012 | ETOPOSIDE |
PT083585377 | 10MAY2012 | ETOPOSIDE |
PT083585377 | 11MAY2012 | ETOPOSIDE |
PT083585377 | 30MAY2012 | CARBOPLATIN |
PT083585377 | 30MAY2012 | ETOPOSIDE |
PT083585377 | 31MAY2012 | ETOPOSIDE |
PT083585377 | 01JUN2012 | ETOPOSIDE |
PT083585377 | 20JUN2012 | CARBOPLATIN |
PT083585377 | 20JUN2012 | ETOPOSIDE |
PT083585377 | 21JUN2012 | ETOPOSIDE |
PT083585377 | 22JUN2012 | ETOPOSIDE |
PT083585377 | 25JUL2012 | CARBOPLATIN |
PT083585377 | 25JUL2012 | ETOPOSIDE |
PT083585377 | 26JUL2012 | ETOPOSIDE |
PT083585377 | 27JUL2012 | ETOPOSIDE |
PT083585377 | 15AUG2012 | CARBOPLATIN |
PT083585377 | 15AUG2012 | ETOPOSIDE |
PT083585377 | 16AUG2012 | ETOPOSIDE |
PT083585377 | 17AUG2012 | ETOPOSIDE |
PT083585377 | 03JAN2013 | CARBOPLATIN |
PT083585377 | 03JAN2013 | ETOPOSIDE |
PT083585377 | 22JAN2013 | CARBOPLATIN |
PT083585377 | 22JAN2013 | IRINOTECAN |
PT083585377 | 18FEB2013 | CARBOPLATIN |
PT083585377 | 18FEB2013 | IRINOTECAN |
PT083585377 | 25FEB2013 | CARBOPLATIN |
PT083585377 | 25FEB2013 | IRINOTECAN |
PT083585377 | 04MAR2013 | CARBOPLATIN |
PT083585377 | 04MAR2013 | IRINOTECAN |
PT083585377 | 16APR2013 | CARBOPLATIN |
PT083585377 | 16APR2013 | IRINOTECAN |
PT083585377 | 23APR2013 | CARBOPLATIN |
PT083585377 | 23APR2013 | IRINOTECAN |
PT083585377 | 30APR2013 | CARBOPLATIN |
PT083585377 | 30APR2013 | IRINOTECAN |
PT083585377 | 31MAY2013 | CARBOPLATIN |
PT083585377 | 31MAY2013 | IRINOTECAN |
PT083585377 | 07JUN2013 | CARBOPLATIN |
PT083585377 | 07JUN2013 | IRINOTECAN |
PT083585377 | 24JUN2013 | TOPOTECAN |
PT083585377 | 25JUN2013 | TOPOTECAN |
PT083585377 | 26JUN2013 | TOPOTECAN |
PT083585377 | 27JUN2013 | TOPOTECAN |
PT083585377 | 28JUN2013 | TOPOTECAN |
PT083585377 | 22JUL2013 | CARBOPLATIN |
PT083585377 | 22JUL2013 | IRINOTECAN |
PT083585377 | 22JUL2013 | TOPOTECAN |
PT083585377 | 29JUL2013 | TOPOTECAN |
PT083585377 | 30JUL2013 | TOPOTECAN |
PT083585377 | 31JUL2013 | TOPOTECAN |
PT083585377 | 01AUG2013 | TOPOTECAN |
PT083585377 | 02AUG2013 | TOPOTECAN |
PT083585377 | 26AUG2013 | TOPOTECAN |
PT083585377 | 27AUG2013 | TOPOTECAN |
PT083585377 | 28AUG2013 | TOPOTECAN |
PT083585377 | 29AUG2013 | TOPOTECAN |
PT083585377 | 16SEP2013 | DOCETAXEL |
PT083585377 | 08OCT2013 | DOCETAXEL |
PT083585377 | 11NOV2013 | GEMCITABINE |
PT083585377 | 18NOV2013 | GEMCITABINE |
PT083585377 | 09DEC2013 | GEMCITABINE |
PT083585377 | 23DEC2013 | GEMCITABINE |
PT083585377 | 30DEC2013 | GEMCITABINE |
PT083585377 | 06JAN2014 | GEMCITABINE |
I have made this intermediate step ;
sasuser.sample1_step1
Obs PTID DRUG STDT ENDDT12345678910111213141516
PT078375333 | Etoposide | 31DEC2014 | 06MAR2015 |
PT078375333 | Cisplatin | 01JAN2015 | 04MAR2015 |
PT078837214 | Carboplatin | 13NOV2013 | 11DEC2013 |
PT078837214 | Etoposide | 13NOV2013 | 12DEC2013 |
PT082388044 | Carboplatin | 18MAR2013 | 22MAY2013 |
PT082388044 | Etoposide | 18MAR2013 | 24MAY2013 |
PT082388044 | Carboplatin | 19JAN2015 | 24MAR2015 |
PT082388044 | Etoposide | 19JAN2015 | 26MAR2015 |
PT083585377 | Carboplatin | 17APR2012 | 15AUG2012 |
PT083585377 | Etoposide | 17APR2012 | 17AUG2012 |
PT083585377 | Etoposide | 03JAN2013 | 03JAN2013 |
PT083585377 | Carboplatin | 03JAN2013 | 22JUL2013 |
PT083585377 | Irinotecan | 22JAN2013 | 22JUL2013 |
PT083585377 | Topotecan | 24JUN2013 | 29AUG2013 |
PT083585377 | Docetaxel | 16SEP2013 | 08OCT2013 |
PT083585377 | Gemcitabine | 11NOV2013 | 06JAN2014 |
The code for the intermediate step is:
proc sort data=sasuser.sample1_drugs out=sample1_sorted; by ptid drug ; run ; data step1_1 ; set sample1_sorted ; by ptid drug ; if first.drug then gap=0; format lag_date date9. ; lag_date= lag(drug_dt); if drug_dt-lag_date >=60 /*and first.ptid=0*/ then gap + 1; run ; proc sql; create table step1_final as select ptid, drug, min(drug_dt) as stdt format=date9. ,max(drug_dt) as enddt format=date9. from step1_1 group by ptid, drug, gap ; quit ;
I want to create this final dataset ;
sasuser.sample1_step2
Obs PTID REGIMEN SDATE EDATE12345678910111213141516171819202122
PT078375333 | Etoposide | 31DEC2014 | 31DEC2014 |
PT078375333 | Cisplatin+Etoposide | 01JAN2015 | 04MAR2015 |
PT078375333 | Etoposide | 05MAR2015 | 06MAR2015 |
PT078837214 | Carboplatin+Etoposide | 13NOV2013 | 11DEC2013 |
PT078837214 | Etoposide | 12DEC2013 | 12DEC2013 |
PT082388044 | Carboplatin+Etoposide | 18MAR2013 | 22MAY2013 |
PT082388044 | Etoposide | 23MAY2013 | 24MAY2013 |
PT082388044 | Blank | 25MAY2013 | 18JAN2015 |
PT082388044 | Carboplatin+Etoposide | 19JAN2015 | 24MAR2015 |
PT082388044 | Etoposide | 25MAR2015 | 26MAR2015 |
PT083585377 | Carboplatin+Etoposide | 17APR2012 | 15AUG2012 |
PT083585377 | Etoposide | 16AUG2012 | 17AUG2012 |
PT083585377 | Blank | 18AUG2012 | 02JAN2013 |
PT083585377 | Carboplatin+Etoposide | 03JAN2013 | 03JAN2013 |
PT083585377 | Carboplatin | 04JAN2013 | 21JAN2013 |
PT083585377 | Carboplatin+Irinotecan | 22JAN2013 | 23JUN2013 |
PT083585377 | Carboplatin+Irinotecan+Topotecan | 24JUN2013 | 22JUL2013 |
PT083585377 | Topotecan | 23JUL2013 | 29AUG2013 |
PT083585377 | Blank | 30AUG2013 | 15SEP2013 |
PT083585377 | Docetaxel | 16SEP2013 | 08OCT2013 |
PT083585377 | Blank | 09OCT2013 | 10NOV2013 |
PT083585377 | Gemcitabine | 11NOV2013 | 06JAN2014 |
How to code for this ?
Here is a "brute force" method, which simply generates one obs per date per regimen, and then sums that up:
data all_days;
set step1_final;
do date=stdt to enddt;
output;
end;
drop stdt enddt;
run;
proc sort;
by ptid date;
run;
data all_days2;
do until(last.date);
set all_days;
by ptid date;
length regimen $200;
call catx('+',regimen,drug);
end;
run;
data all_days3;
set all_days2;
by ptid date;
diff=dif(date);
output;
if not first.ptid and diff>1 then do;
regimen='BLANK';
do date=date-diff+1 to date-1;
output;
end;
end;
run;
proc sort;
by ptid date;
run;
data want;
do until(last.regimen);
set all_days3;
by ptid regimen notsorted;
if first.regimen then
stdt=date;
end;
enddt=date;
format stdt enddt date9.;
keep ptid regimen stdt enddt;
run;
I realized that my first suggestion might create to many "BLANK" records if one patient was followed by another whose regimen started after the first patient's regimen ended. So I edited my response a bit to account for that (the datastep ALL_DAYS2 is split into two steps).
Please explain the logic that enables you to get the final data set from the original data set.
Step 1: Find the treatment periods of each individual drug for each patient. Use the drug encounter data to create periods of continuous treatment for each individual chemotherapy. A period of continuous chemo administration starts on the first date of use and ends when there is a gap of greater than 60 days between 2 consecutive chemotherapy (of the same drug) administrations.
Step 2: Line up treatments and create the patient's treatment journey from start of first treatment to the end of the last treatment. Identify treatments that are being given concurrently. If there are no treatments given during a certain period, that period will be labeled as a blank regimen.
Here is a "brute force" method, which simply generates one obs per date per regimen, and then sums that up:
data all_days;
set step1_final;
do date=stdt to enddt;
output;
end;
drop stdt enddt;
run;
proc sort;
by ptid date;
run;
data all_days2;
do until(last.date);
set all_days;
by ptid date;
length regimen $200;
call catx('+',regimen,drug);
end;
run;
data all_days3;
set all_days2;
by ptid date;
diff=dif(date);
output;
if not first.ptid and diff>1 then do;
regimen='BLANK';
do date=date-diff+1 to date-1;
output;
end;
end;
run;
proc sort;
by ptid date;
run;
data want;
do until(last.regimen);
set all_days3;
by ptid regimen notsorted;
if first.regimen then
stdt=date;
end;
enddt=date;
format stdt enddt date9.;
keep ptid regimen stdt enddt;
run;
I realized that my first suggestion might create to many "BLANK" records if one patient was followed by another whose regimen started after the first patient's regimen ended. So I edited my response a bit to account for that (the datastep ALL_DAYS2 is split into two steps).
Hi @saipriya92 ,
You can try this. Hope this help!
/* Expand dataset: create one row per day for each drug */
data sample1_step1_exp;
set sample1_step1;
format DAY date9.;
do DAY=STDT to ENDDT;
output;
end;
run;
proc sort data=sample1_step1_exp;
by PTID DAY;
run;
/* Transpose drugs to create combination regimens */
proc transpose data=sample1_step1_exp out=sample1_step1_tr (drop=_name_);
var DRUG;
by PTID DAY;
run;
data sample1_step1_regimen1;
set sample1_step1_tr;
REGIMEN = catx(" + ",of col:);
keep PTID DAY REGIMEN;
run;
/* Creation of the variable COUNTER, which is incremented each time a new regimen is given */
data sample1_step1_regimen2;
set sample1_step1_regimen1;
by PTID REGIMEN notsorted;
if first.PTID then COUNTER=0;
if first.REGIMEN then COUNTER + 1;
run;
/* Creation of the final table without "blank" periods of time */
proc sql;
create table sample1_step1_regimen3 as
select PTID,
min(DAY) as STDT format=date9.,
max(DAY) as ENDDT format=date9.,
REGIMEN
from sample1_step1_regimen2 group by PTID, REGIMEN, COUNTER order by PTID, STDT ;
quit;
/* Identification of "blank" periods of time */
data sample1_step1_blank (keep= PTID REGIMEN _STDT_blank _ENDDT_blank );
set sample1_step1_regimen3;
format _STDT_blank _ENDDT_blank date9.;
by PTID;
_STDT_blank = lag(ENDDT) +1 ;
if first.PTID or _STDT_blank = STDT then _STDT_blank=.;
if _STDT_blank ne . then do;
_ENDDT_blank = STDT - 1;
REGIMEN = "Blank";
output;
end;
run;
/* Creation of the final table (retrieve "blank" periods of time) */
data sample1_step2;
set sample1_step1_regimen3 sample1_step1_blank (rename=(_STDT_blank= STDT _ENDDT_blank=ENDDT));
run;
proc sort data=sample1_step2;
by PTID STDT;
run;
Output:
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.