Hi all,
I have data in the form
ID drug_id date_start date_end
a 1 1/1/01
a 2 1/1/01
a 3 1/1/01 1/1/02
a 4 1/1/02
b ...
I am trying to convert it to something like
ID regimen1 date_start_regimen1 data_stop_regimen1 regimen2 date_start_regimen2 date_stop_regimen2
a 123 1/1/01 1/1/02 124 1/1/02
b ...
Any suggestions?? I'm scratching my head, thank you!
Hi @sometimesqs
Here is an attempt to achieve this job:
The following code considers that missing end dates means that the treatment is ongoing.
if it is not the case, please change the method for imputation -> e.g. imputation of the first non-missing the value ahead.
Hope this help.
data have;
infile datalines truncover;
input ID $ drug_id date_start:mmddyy8. date_end:mmddyy8.;
format date_start date_end:mmddyy8.;
datalines;
a 1 1/1/01
a 2 1/1/01
a 3 1/1/01 1/1/02
a 4 1/1/02
;
run;
/* Temporary imputation of missing end_date for ongoing treatments */
data have2;
set have;
if date_end = . then date_end = today()+1;
run;
/* Expand dataset: create one row per day for each drug */
data have_exp;
set have2;
format day mmddyy8.;
do day=date_start to date_end;
output;
end;
run;
proc sort data=have_exp;
by id day;
run;
/* Transpose drugs to create combination */
proc transpose data=have_exp out=have_tr (drop=_name_);
var drug_id;
by id day;
run;
data have_regimen;
set have_tr;
regimen = tranwrd(cats(of col:),'.','');
keep id day regimen;
run;
/* Creation of the variable counter, which is incremented each time a new combination is given */
data have_regimen2;
set have_regimen;
by id regimen notsorted;
if first.id then counter=0;
if first.combi then counter + 1;
run;
/* Creation of the final table */
proc sql;
create table have3 as
select id,
regimen,
min(day) as date_start_regimen format=mmddyy8.,
max(day) as date_end_regimen format=mmddyy8.
from have_regimen2
group by id, regimen, counter
order by id, regimen;
quit;
data have4;
set have3;
if date_end_regimen = today()+1 then date_end_regimen=.;
run;
/* Transposition*/
proc transpose data=have4 out=want_1 (drop=_name_) prefix=regimen;
var regimen ;
by id;
run;
proc transpose data=have4 out=want_2 (drop=_name_) prefix=date_start_regimen;
var date_start_regimen ;
by id;
run;
proc transpose data=have4 out=want_3 (drop=_name_) prefix=date_end_regimen;
var date_end_regimen ;
by id;
run;
data want;
merge want_:;
by id;
run;
proc print;
var regimen1 date_start_regimen1 date_end_regimen1
regimen2 date_start_regimen2 date_end_regimen2
regimen3 date_start_regimen3 date_end_regimen3;
id id;
run;
Where does regimen come in?
You're going to need to transpose this through proc transpose, but we need more information and to have all the data listed.
Hi @sometimesqs
Here is an attempt to achieve this job:
The following code considers that missing end dates means that the treatment is ongoing.
if it is not the case, please change the method for imputation -> e.g. imputation of the first non-missing the value ahead.
Hope this help.
data have;
infile datalines truncover;
input ID $ drug_id date_start:mmddyy8. date_end:mmddyy8.;
format date_start date_end:mmddyy8.;
datalines;
a 1 1/1/01
a 2 1/1/01
a 3 1/1/01 1/1/02
a 4 1/1/02
;
run;
/* Temporary imputation of missing end_date for ongoing treatments */
data have2;
set have;
if date_end = . then date_end = today()+1;
run;
/* Expand dataset: create one row per day for each drug */
data have_exp;
set have2;
format day mmddyy8.;
do day=date_start to date_end;
output;
end;
run;
proc sort data=have_exp;
by id day;
run;
/* Transpose drugs to create combination */
proc transpose data=have_exp out=have_tr (drop=_name_);
var drug_id;
by id day;
run;
data have_regimen;
set have_tr;
regimen = tranwrd(cats(of col:),'.','');
keep id day regimen;
run;
/* Creation of the variable counter, which is incremented each time a new combination is given */
data have_regimen2;
set have_regimen;
by id regimen notsorted;
if first.id then counter=0;
if first.combi then counter + 1;
run;
/* Creation of the final table */
proc sql;
create table have3 as
select id,
regimen,
min(day) as date_start_regimen format=mmddyy8.,
max(day) as date_end_regimen format=mmddyy8.
from have_regimen2
group by id, regimen, counter
order by id, regimen;
quit;
data have4;
set have3;
if date_end_regimen = today()+1 then date_end_regimen=.;
run;
/* Transposition*/
proc transpose data=have4 out=want_1 (drop=_name_) prefix=regimen;
var regimen ;
by id;
run;
proc transpose data=have4 out=want_2 (drop=_name_) prefix=date_start_regimen;
var date_start_regimen ;
by id;
run;
proc transpose data=have4 out=want_3 (drop=_name_) prefix=date_end_regimen;
var date_end_regimen ;
by id;
run;
data want;
merge want_:;
by id;
run;
proc print;
var regimen1 date_start_regimen1 date_end_regimen1
regimen2 date_start_regimen2 date_end_regimen2
regimen3 date_start_regimen3 date_end_regimen3;
id id;
run;
thank you! this is exactly what i was going for
Great !
My pleasure
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.