Hi guys,
suppose to have the following table:
data DB;
input ID :$20. Admission :date09. Discharge :date09. Age_class Age_end Index Value Total;
format Admission date9. Discharge date9.;
cards;
0001 01JUL2014 16AUG2014 1 4 1 2.3 11.9
0001 13MAY2018 22JUN2018 3 4 0 1.4 .
0001 23JAN2019 25JAN2019 4 4 0 3.2 .
0002 13MAY2016 22SEP2016 1 5 1 2 7.9
0002 09JUL2023 10JUL2023 2 5 0 0.3 .
0002 12SEP2024 15SEP2024 3 5 0 0.2 .
0003 01JUL2014 18AUG2014 1 3 1 12 17.3
0003 07DEC2023 16DEC2023 2 3 0 0.3 .
0004 12JAN2014 15JAN2014 1 2 1 2 2.1
0004 30MAY2019 13JUL2019 2 2 0 0.1 .
0005 30JUN2019 13OCT2019 5 5 0 4.1 .
;
run;
Is there a way to get the following?
data DB1;
input ID :$20. Admission :date09. Discharge :date09. Age_class Age_end Index Value Total Age_class1 Age_class2 Age_class3 Age_class4 Age_class5;
format Admission date9. Discharge date9.;
cards;
0001 01JUL2014 16AUG2014 1 4 1 2.3 11.9 2.3 5 1.4 3.2 .
0001 13MAY2018 22JUN2018 3 4 0 1.4 . . . . . .
0001 23JAN2019 25JAN2019 4 4 0 3.2 . . . . . .
0002 13MAY2016 22SEP2016 1 5 1 2 7.9 2 0.3 0.2 5 0.4
0002 09JUL2023 10JUL2023 2 5 0 0.3 . . . . . .
0002 12SEP2024 15SEP2024 3 5 0 0.2 . . . . . .
0003 01JUL2014 18AUG2014 1 3 1 12 17.3 12 0.3 5 . .
0003 07DEC2023 16DEC2023 2 3 0 0.3 . . . . . .
0004 12JAN2014 15JAN2014 1 2 1 2 2.1 2 0.1 . . .
0004 30MAY2019 13JUL2019 2 2 0 0.1 . . . . . .
0005 30JUN2019 13OCT2019 5 5 1 4.1 4.1 . . . . 4.1
;
run;
In other words, for each ID in DB there is the Age_class at each admission and an Age_class at exit (Age_end) from the study. Then, there is the "Value" variable that is the time the patient stay in the corresponding Age_class (before exit). There is also a "Total" time in the study. What I need is to transform the dataset DB into a wide format where there are columns corresponding to the age classes filled by the corresponding Value. Note that in this new dataset the row-wise total must be reached and must be equal to "Total" and so: if there is a jump in the age classes (Age_class variable) a value = 5 years must be added because the age-class interval is of max 5 years.
Finally, in the new wide dataset (DB1) rows must be filled only where Index = 1
Can anyone help me please?
Thank you very much in advance
Does below return what you're after?
data have;
input ID :$20. Admission :date09. Discharge :date09. Age_class Age_end Index Value Total Age_class1 Age_class2 Age_class3 Age_class4 Age_class5;
format Admission date9. Discharge date9.;
cards;
0001 01JUL2014 16AUG2014 1 4 1 2.3 11.9 2.3 1.4 5 3.2 .
0001 13MAY2018 22JUN2018 3 4 0 1.4 . . . . . .
0001 23JAN2019 25JAN2019 4 4 0 3.2 . . . . . .
0002 13MAY2016 22SEP2016 1 5 1 2 7.9 2 0.3 0.2 5 0.4
0002 09JUL2023 10JUL2023 2 5 0 0.3 . . . . . .
0002 12SEP2024 15SEP2024 3 5 0 0.2 . . . . . .
0003 01JUL2014 18AUG2014 1 3 1 12 17.3 12 0.3 5 . .
0003 07DEC2023 16DEC2023 2 3 0 0.3 . . . . . .
0004 12JAN2014 15JAN2014 1 2 1 2 2.1 2 0.1 . . .
0004 30MAY2019 13JUL2019 2 2 0 0.1 . . . . . .
0005 30JUN2019 13OCT2019 5 5 1 4.1 4.1 . . . . 4.1
0006 30JUN2019 13OCT2019 5 5 1 0 0 . . . . .
;
run;
data want(drop=_:);
if _n_=1 then
do;
_val=0;
dcl hash h1(dataset:'have(rename=(value=_val))', ordered:'y');
h1.defineKey('id','age_class');
h1.defineData('_val');
h1.defineDone();
end;
set have;
array age_class_derived{5} 8;
if index=1 then
do;
/* distribute existing values */
do _i=1 to dim(age_class_derived);
if h1.find(key:id,key:_i)=0 then age_class_derived[_i]=_val;
end;
/* fill-up with max 5 until total reached */
_total=round(sum(of age_class_derived[*]),.0000001);
do _i=1 to dim(age_class_derived) while( _total<total );
if missing(age_class_derived[_i]) then
do;
age_class_derived[_i]=min(5,abs(sum(total,-_total)));
end;
_total=round(sum(of age_class_derived[*]),.0000001);
end;
end;
run;
proc print data=want;
run;
I've made the assumption that the marked cells in your sample data were typos.
Does below return what you're after?
data have;
input ID :$20. Admission :date09. Discharge :date09. Age_class Age_end Index Value Total Age_class1 Age_class2 Age_class3 Age_class4 Age_class5;
format Admission date9. Discharge date9.;
cards;
0001 01JUL2014 16AUG2014 1 4 1 2.3 11.9 2.3 1.4 5 3.2 .
0001 13MAY2018 22JUN2018 3 4 0 1.4 . . . . . .
0001 23JAN2019 25JAN2019 4 4 0 3.2 . . . . . .
0002 13MAY2016 22SEP2016 1 5 1 2 7.9 2 0.3 0.2 5 0.4
0002 09JUL2023 10JUL2023 2 5 0 0.3 . . . . . .
0002 12SEP2024 15SEP2024 3 5 0 0.2 . . . . . .
0003 01JUL2014 18AUG2014 1 3 1 12 17.3 12 0.3 5 . .
0003 07DEC2023 16DEC2023 2 3 0 0.3 . . . . . .
0004 12JAN2014 15JAN2014 1 2 1 2 2.1 2 0.1 . . .
0004 30MAY2019 13JUL2019 2 2 0 0.1 . . . . . .
0005 30JUN2019 13OCT2019 5 5 1 4.1 4.1 . . . . 4.1
0006 30JUN2019 13OCT2019 5 5 1 0 0 . . . . .
;
run;
data want(drop=_:);
if _n_=1 then
do;
_val=0;
dcl hash h1(dataset:'have(rename=(value=_val))', ordered:'y');
h1.defineKey('id','age_class');
h1.defineData('_val');
h1.defineDone();
end;
set have;
array age_class_derived{5} 8;
if index=1 then
do;
/* distribute existing values */
do _i=1 to dim(age_class_derived);
if h1.find(key:id,key:_i)=0 then age_class_derived[_i]=_val;
end;
/* fill-up with max 5 until total reached */
_total=round(sum(of age_class_derived[*]),.0000001);
do _i=1 to dim(age_class_derived) while( _total<total );
if missing(age_class_derived[_i]) then
do;
age_class_derived[_i]=min(5,abs(sum(total,-_total)));
end;
_total=round(sum(of age_class_derived[*]),.0000001);
end;
end;
run;
proc print data=want;
run;
I've made the assumption that the marked cells in your sample data were typos.
Really really sorry: for ID 0001 age class 3 = 1.4 while age_class = 2 is 5. I will edit my question soon.
The desired result does not make much sense.
Either make a wide dataset with one observation per ID.
So perhaps something like this?
Or leave it in the tall format.
Agreeing with @Tom , I don't see the point of doing this transformation from long to wide. What is the benefit here of creating a wide data set as you stated above? What can you do with such a wide data set that you can't do with the long data set?
As things stand so far in this thread, this is the XY problem, where it seems like you are tightly focused on one specific programming task, and possibly you have chosen to go down a path this is inefficient and hard to program; and if we knew where you wanted to go, we could help figure out a better path that is easier to program.
@PaigeMiller wrote:
Agreeing with @Tom , I don't see the point of doing this transformation from long to wide. What is the benefit here of creating a wide data set as you stated above? What can you do with such a wide data set that you can't do with the long data set?
As things stand so far in this thread, this is the XY problem, where it seems like you are tightly focused on one specific programming task, and possibly you have chosen to go down a path this is inefficient and hard to program; and if we knew where you wanted to go, we could help figure out a better path that is easier to program.
From several of the other posts by the OP I would say this set might be related to interpolating between the "wide" values prior to transposing back to a long form.
I would say that OP posts have all the symptoms of an XY problem with never describing actual starting data and where to go but keeps asking how to do one task and often poorly described at that.
Just for having some fun.
data DB;
input ID :$20. Admission :date09. Discharge :date09. Age_class Age_end Index Value Total;
format Admission date9. Discharge date9.;
cards;
0001 01JUL2014 16AUG2014 1 4 1 2.3 11.9
0001 13MAY2018 22JUN2018 3 4 0 1.4 .
0001 23JAN2019 25JAN2019 4 4 0 3.2 .
0002 13MAY2016 22SEP2016 1 5 1 2 7.9
0002 09JUL2023 10JUL2023 2 5 0 0.3 .
0002 12SEP2024 15SEP2024 3 5 0 0.2 .
0003 01JUL2014 18AUG2014 1 3 1 12 17.3
0003 07DEC2023 16DEC2023 2 3 0 0.3 .
0004 12JAN2014 15JAN2014 1 2 1 2 2.1
0004 30MAY2019 13JUL2019 2 2 0 0.1 .
0005 30JUN2019 13OCT2019 5 5 0 4.1 .
;
run;
proc sql noprint;
create table summary as
select ID,sum(Total)-sum(Value) as balance
from DB
group by ID;
select max(Age_class) into :n trimmed from DB;
quit;
data token;
set summary;
temp=balance;n=0;
if balance<5 then do;n=1;output;end;
else do;
do until(balance<5);
n+1;balance=5;output;
balance=temp-5;
temp=temp-5;
end;
n+1;output;
end;
drop temp;
run;
proc transpose data=DB out=transpose(drop=_name_) prefix=Age_class;
by id ;
id Age_class;
var value;
run;
data want;
if _n_=1 then do;
if 0 then set DB;
retain Age_class1-Age_class&n.; *Change the order of variables;
if 0 then set token;
declare hash h(dataset:'token');
h.definekey('id','n');
h.definedata('balance');
h.definedone();
end;
merge DB transpose;
by id;
array x{*} Age_class1-Age_class&n.;
n=0;
if first.id then do;
do i=1 to dim(x);
if missing(x{i}) then do;
n+1;call missing(balance);rc=h.find();x{i}=balance;rc=h.remove();
end;
end;
end;
drop rc n i balance;
output;
call missing(of _all_);
run;
Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9.
Early bird rate extended! Save $200 when you sign up by March 31.
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.