Hi all,
I have some code which I think could be improved through combining multiple DATA steps into one.
Currently I generate multiple tables where I keep specific values (the actual dataset has a lot more variables) where a specific criteria is met. After I then join them all together with the PROC SQL function based on a unique identifier so they can then be joined with other tables.
Below I've attached some sample data of how it currently operates. Any help would be appreciated.
Many thanks,
Sandy
data work.have;
input UID :3. CRITERIA :$8. VARIABLE1 :$9. VARIABLE2 :$9. VARIABLE3 :$9. VARIABLE4 :$9. VARIABLE5 :$9. VARIABLE6 :$9. VARIABLE7 :$9. VARIABLE8 :$9.;
infile datalines dlm=',';
datalines;
1,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
1,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
1,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
1,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
1,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
2,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
2,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
2,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
2,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
2,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
3,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
3,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
3,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
3,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
3,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
4,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
4,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
4,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
4,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
4,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
5,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
5,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
5,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
5,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
5,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
;
run;
data work.data1;
set work.have;
keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
where CRITERIA = 'A';
run;
data work.data2;
set work.have;
keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
where CRITERIA = 'B';
run;
data work.data3;
set work.have;
keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
where CRITERIA = 'C';
run;
data work.data4;
set work.have;
keep UID VARIABLE5 VARIABLE2 VARIABLE3;
where CRITERIA = 'D';
run;
proc sql;
create table work.want as
select t1.UID,
t1.VARIABLE1 AS A_VAR1,
t1.VARIABLE2 AS A_VAR2,
t1.VARIABLE3 AS A_VAR3,
t1.VARIABLE4 AS A_VAR4,
t2.VARIABLE1 AS B_VAR1,
t2.VARIABLE2 AS B_VAR2,
t2.VARIABLE3 AS B_VAR3,
t2.VARIABLE4 AS B_VAR4,
t3.VARIABLE1 AS C_VAR1,
t3.VARIABLE2 AS C_VAR2,
t3.VARIABLE3 AS C_VAR3,
t3.VARIABLE4 AS C_VAR4,
t4.VARIABLE5 AS D_VAR5,
t4.VARIABLE2 AS D_VAR2,
t4.VARIABLE3 AS D_VAR3
from work.data1 t1
left join work.data2 t2 on (t2.UID=t1.UID)
left join work.data3 t3 on (t3.UID=t1.UID)
left join work.data4 t4 on (t4.UID=t1.UID);
quit;
proc sort data=work.have(keep=UID CRITERIA VARIABLE1-VARIABLE5) out=result;
by UID;
where CRITERIA in ('A','B','C','D');
run;
data result;
set result;
by UID;
retain
A_VAR1-A_VAR4
B_VAR1-B_VAR4
C_VAR1-C_VAR4
D_VAR5 D_VAR2 D_VAR3
;
array FromVar{*} VARIABLE1-VARIABLE5;
array ToVar_A{*} $9 A_VAR1-A_VAR4;
array ToVar_B{*} $9 B_VAR1-B_VAR4;
array ToVar_C{*} $9 C_VAR1-C_VAR4;
array ToVar_D{*} $9 D_VAR5 D_VAR2 D_VAR3;
retain FLAG;
if first.UID then do;
call missing(of ToVar_A{*});
call missing(of ToVar_B{*});
call missing(of ToVar_C{*});
call missing(of ToVar_D{*});
FLAG=0;
end;
select;
when(CRITERIA='A') do;
do i=1 to dim(ToVar_A);ToVar_A[i]=FromVar[i];end;
FLAG=1;
end;
when(CRITERIA='B') do;
do i=1 to dim(ToVar_B);ToVar_B[i]=FromVar[i];end;
end;
when(CRITERIA='C') do;
do i=1 to dim(ToVar_C);ToVar_C[i]=FromVar[i];end;
end;
when(CRITERIA='D') do;
ToVar_D[1]=FromVar[5];
do i=2 to dim(ToVar_D);ToVar_D[i]=FromVar[i];end;
end;
otherwise;
end;
if last.UID and FLAG;
drop i FLAG CRITERIA VARIABLE1-VARIABLE5;
run;
proc sort data=work.have(keep=UID CRITERIA VARIABLE1-VARIABLE5) out=result;
by UID;
where CRITERIA in ('A','B','C','D');
run;
data result;
set result;
by UID;
retain
A_VAR1-A_VAR4
B_VAR1-B_VAR4
C_VAR1-C_VAR4
D_VAR5 D_VAR2 D_VAR3
;
array FromVar{*} VARIABLE1-VARIABLE5;
array ToVar_A{*} $9 A_VAR1-A_VAR4;
array ToVar_B{*} $9 B_VAR1-B_VAR4;
array ToVar_C{*} $9 C_VAR1-C_VAR4;
array ToVar_D{*} $9 D_VAR5 D_VAR2 D_VAR3;
retain FLAG;
if first.UID then do;
call missing(of ToVar_A{*});
call missing(of ToVar_B{*});
call missing(of ToVar_C{*});
call missing(of ToVar_D{*});
FLAG=0;
end;
select;
when(CRITERIA='A') do;
do i=1 to dim(ToVar_A);ToVar_A[i]=FromVar[i];end;
FLAG=1;
end;
when(CRITERIA='B') do;
do i=1 to dim(ToVar_B);ToVar_B[i]=FromVar[i];end;
end;
when(CRITERIA='C') do;
do i=1 to dim(ToVar_C);ToVar_C[i]=FromVar[i];end;
end;
when(CRITERIA='D') do;
ToVar_D[1]=FromVar[5];
do i=2 to dim(ToVar_D);ToVar_D[i]=FromVar[i];end;
end;
otherwise;
end;
if last.UID and FLAG;
drop i FLAG CRITERIA VARIABLE1-VARIABLE5;
run;
This revised version looks like a big improvement. Did you have a question about it?
@learsaas wrote:
https://communities.sas.com/t5/SAS-Programming/Row-to-variable/td-p/522615
Yup, still works as designed.
Essentially what @Sanflo is doing is a transpose task. But their solution is hard coded for the values so doesn't generalize still.
The solution posted, with the appropriate changes for the new data, still works.
Your are sorted by UID, so you can merge subsets of variables (renamed) by id, as in:
data work.have;
input UID :3. CRITERIA :$8. VARIABLE1 :$9. VARIABLE2 :$9. VARIABLE3 :$9. VARIABLE4 :$9. VARIABLE5 :$9. VARIABLE6 :$9. VARIABLE7 :$9. VARIABLE8 :$9.;
infile datalines dlm=',';
datalines;
1,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
1,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
1,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
1,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
1,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
2,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
2,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
2,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
2,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
2,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
3,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
3,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
3,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
3,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
3,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
4,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
4,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
4,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
4,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
4,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
5,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
5,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
5,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
5,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
5,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
;
run;
data want (drop=criteria);
merge have (where=(criteria='A')
rename=(variable1=A_variable1 variable2=A_variable2 variable3=A_variable3 variable4=A_variable4
variable5=A_variable5 variable6=A_variable6 variable7=A_variable7 variable8=A_variable8) )
have (where=(criteria='B')
rename=(variable1=B_variable1 variable2=B_variable2 variable3=B_variable3 variable4=B_variable4
variable5=B_variable5 variable6=B_variable6 variable7=B_variable7 variable8=B_variable8) )
have (where=(criteria='C')
rename=(variable1=C_variable1 variable2=C_variable2 variable3=C_variable3 variable4=C_variable4
variable5=C_variable5 variable6=C_variable6 variable7=C_variable7 variable8=C_variable8) )
have (where=(criteria='D')
rename=(variable1=D_variable1 variable2=D_variable2 variable3=D_variable3 variable4=D_variable4
variable5=D_variable5 variable6=D_variable6 variable7=D_variable7 variable8=D_variable8) )
;
by uid;
run;
And if the task of typing all those renames is a problem, then the PROC SQL access to dictionary.columns can help you write those renames directly from the meta-data for data set HAVE:
proc sql noprint;
select distinct cats(name,'=A_',name),
cats(name,'=B_',name),
cats(name,'=C_',name),
cats(name,'=D_',name)
into :renamea separated by ' ',
:renameb separated by ' ',
:renamec separated by ' ',
:renamed separated by ' '
from dictionary.columns
where libname='WORK' and memname='HAVE' and not upcase(name) in('UID','CRITERIA');
quit;
data want1 (drop=criteria);
merge have (where=(criteria='A') rename=(&renamea))
have (where=(criteria='B') rename=(&renameb))
have (where=(criteria='C') rename=(&renamec))
have (where=(criteria='D') rename=(&renamed));
by uid;
run;
Both of these solutions also work perfectly.
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.