Hi all,
I have data in the form
ID drug_id date_start date_end
a 1 1/1/01
a 2 1/1/01
a 3 1/1/01 1/1/02
a 4 1/1/02
b ...
I am trying to convert it to something like
ID regimen1 date_start_regimen1 data_stop_regimen1 regimen2 date_start_regimen2 date_stop_regimen2
a 123 1/1/01 1/1/02 124 1/1/02
b ...
Any suggestions?? I'm scratching my head, thank you!
Hi @sometimesqs
Here is an attempt to achieve this job:
The following code considers that missing end dates means that the treatment is ongoing.
if it is not the case, please change the method for imputation -> e.g. imputation of the first non-missing the value ahead.
Hope this help.
data have;
infile datalines truncover;
input ID $ drug_id date_start:mmddyy8. date_end:mmddyy8.;
format date_start date_end:mmddyy8.;
datalines;
a 1 1/1/01
a 2 1/1/01
a 3 1/1/01 1/1/02
a 4 1/1/02
;
run;
/* Temporary imputation of missing end_date for ongoing treatments */
data have2;
set have;
if date_end = . then date_end = today()+1;
run;
/* Expand dataset: create one row per day for each drug */
data have_exp;
set have2;
format day mmddyy8.;
do day=date_start to date_end;
output;
end;
run;
proc sort data=have_exp;
by id day;
run;
/* Transpose drugs to create combination */
proc transpose data=have_exp out=have_tr (drop=_name_);
var drug_id;
by id day;
run;
data have_regimen;
set have_tr;
regimen = tranwrd(cats(of col:),'.','');
keep id day regimen;
run;
/* Creation of the variable counter, which is incremented each time a new combination is given */
data have_regimen2;
set have_regimen;
by id regimen notsorted;
if first.id then counter=0;
if first.combi then counter + 1;
run;
/* Creation of the final table */
proc sql;
create table have3 as
select id,
regimen,
min(day) as date_start_regimen format=mmddyy8.,
max(day) as date_end_regimen format=mmddyy8.
from have_regimen2
group by id, regimen, counter
order by id, regimen;
quit;
data have4;
set have3;
if date_end_regimen = today()+1 then date_end_regimen=.;
run;
/* Transposition*/
proc transpose data=have4 out=want_1 (drop=_name_) prefix=regimen;
var regimen ;
by id;
run;
proc transpose data=have4 out=want_2 (drop=_name_) prefix=date_start_regimen;
var date_start_regimen ;
by id;
run;
proc transpose data=have4 out=want_3 (drop=_name_) prefix=date_end_regimen;
var date_end_regimen ;
by id;
run;
data want;
merge want_:;
by id;
run;
proc print;
var regimen1 date_start_regimen1 date_end_regimen1
regimen2 date_start_regimen2 date_end_regimen2
regimen3 date_start_regimen3 date_end_regimen3;
id id;
run;
Where does regimen come in?
You're going to need to transpose this through proc transpose, but we need more information and to have all the data listed.
Hi @sometimesqs
Here is an attempt to achieve this job:
The following code considers that missing end dates means that the treatment is ongoing.
if it is not the case, please change the method for imputation -> e.g. imputation of the first non-missing the value ahead.
Hope this help.
data have;
infile datalines truncover;
input ID $ drug_id date_start:mmddyy8. date_end:mmddyy8.;
format date_start date_end:mmddyy8.;
datalines;
a 1 1/1/01
a 2 1/1/01
a 3 1/1/01 1/1/02
a 4 1/1/02
;
run;
/* Temporary imputation of missing end_date for ongoing treatments */
data have2;
set have;
if date_end = . then date_end = today()+1;
run;
/* Expand dataset: create one row per day for each drug */
data have_exp;
set have2;
format day mmddyy8.;
do day=date_start to date_end;
output;
end;
run;
proc sort data=have_exp;
by id day;
run;
/* Transpose drugs to create combination */
proc transpose data=have_exp out=have_tr (drop=_name_);
var drug_id;
by id day;
run;
data have_regimen;
set have_tr;
regimen = tranwrd(cats(of col:),'.','');
keep id day regimen;
run;
/* Creation of the variable counter, which is incremented each time a new combination is given */
data have_regimen2;
set have_regimen;
by id regimen notsorted;
if first.id then counter=0;
if first.combi then counter + 1;
run;
/* Creation of the final table */
proc sql;
create table have3 as
select id,
regimen,
min(day) as date_start_regimen format=mmddyy8.,
max(day) as date_end_regimen format=mmddyy8.
from have_regimen2
group by id, regimen, counter
order by id, regimen;
quit;
data have4;
set have3;
if date_end_regimen = today()+1 then date_end_regimen=.;
run;
/* Transposition*/
proc transpose data=have4 out=want_1 (drop=_name_) prefix=regimen;
var regimen ;
by id;
run;
proc transpose data=have4 out=want_2 (drop=_name_) prefix=date_start_regimen;
var date_start_regimen ;
by id;
run;
proc transpose data=have4 out=want_3 (drop=_name_) prefix=date_end_regimen;
var date_end_regimen ;
by id;
run;
data want;
merge want_:;
by id;
run;
proc print;
var regimen1 date_start_regimen1 date_end_regimen1
regimen2 date_start_regimen2 date_end_regimen2
regimen3 date_start_regimen3 date_end_regimen3;
id id;
run;
thank you! this is exactly what i was going for
Great !
My pleasure
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.