BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
Sanflo
Fluorite | Level 6

Hi all,

 

I have some code which I think could be improved through combining multiple DATA steps into one.

 

Currently I generate multiple tables where I keep specific values (the actual dataset has a lot more variables) where a specific criteria is met. After I then join them all together with the PROC SQL function based on a unique identifier so they can then be joined with other tables.

 

Below I've attached some sample data of how it currently operates. Any help would be appreciated.

 

Many thanks,

 

Sandy

 

data work.have;
	input UID :3. CRITERIA :$8. VARIABLE1 :$9. VARIABLE2 :$9. VARIABLE3 :$9. VARIABLE4 :$9. VARIABLE5 :$9. VARIABLE6 :$9. VARIABLE7 :$9. VARIABLE8 :$9.;
	infile datalines dlm=',';
	datalines;
1,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
1,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
1,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
1,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
1,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
2,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
2,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
2,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
2,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
2,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
3,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
3,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
3,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
3,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
3,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
4,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
4,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
4,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
4,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
4,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
5,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
5,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
5,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
5,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
5,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
;
run;

data work.data1;
	set work.have;
	keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
	where CRITERIA = 'A';
run;

data work.data2;
	set work.have;
	keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
	where CRITERIA = 'B';
run;

data work.data3;
	set work.have;
	keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
	where CRITERIA = 'C';
run;

data work.data4;
	set work.have;
	keep UID VARIABLE5 VARIABLE2 VARIABLE3;
	where CRITERIA = 'D';
run;

proc sql;
	create table work.want as 
		select t1.UID,
			t1.VARIABLE1 AS A_VAR1,
			t1.VARIABLE2 AS A_VAR2,
			t1.VARIABLE3 AS A_VAR3,
			t1.VARIABLE4 AS A_VAR4,
			t2.VARIABLE1 AS B_VAR1,
			t2.VARIABLE2 AS B_VAR2,
			t2.VARIABLE3 AS B_VAR3,
			t2.VARIABLE4 AS B_VAR4,
			t3.VARIABLE1 AS C_VAR1,
			t3.VARIABLE2 AS C_VAR2,
			t3.VARIABLE3 AS C_VAR3,
			t3.VARIABLE4 AS C_VAR4,
			t4.VARIABLE5 AS D_VAR5,
			t4.VARIABLE2 AS D_VAR2,
			t4.VARIABLE3 AS D_VAR3
		from work.data1 t1
			left join work.data2 t2 on (t2.UID=t1.UID)
			left join work.data3 t3 on (t3.UID=t1.UID)
			left join work.data4 t4 on (t4.UID=t1.UID);
quit;
1 ACCEPTED SOLUTION

Accepted Solutions
learsaas
Quartz | Level 8
proc sort data=work.have(keep=UID CRITERIA VARIABLE1-VARIABLE5) out=result;
 by UID;
 where CRITERIA in ('A','B','C','D');
run;
data result;
 set result;
 by UID;
 retain 
  A_VAR1-A_VAR4 
  B_VAR1-B_VAR4 
  C_VAR1-C_VAR4 
  D_VAR5 D_VAR2 D_VAR3
 ;
 array FromVar{*} VARIABLE1-VARIABLE5;
 array ToVar_A{*} $9 A_VAR1-A_VAR4; 
 array ToVar_B{*} $9 B_VAR1-B_VAR4; 
 array ToVar_C{*} $9 C_VAR1-C_VAR4; 
 array ToVar_D{*} $9 D_VAR5 D_VAR2 D_VAR3; 
 retain FLAG;
 if first.UID then do;
  call missing(of ToVar_A{*});
  call missing(of ToVar_B{*});
  call missing(of ToVar_C{*});
  call missing(of ToVar_D{*});
  FLAG=0;
 end;
 select;
  when(CRITERIA='A') do;
   do i=1 to dim(ToVar_A);ToVar_A[i]=FromVar[i];end;
   FLAG=1;
  end;
  when(CRITERIA='B') do;
   do i=1 to dim(ToVar_B);ToVar_B[i]=FromVar[i];end;
  end;
  when(CRITERIA='C') do;
   do i=1 to dim(ToVar_C);ToVar_C[i]=FromVar[i];end;
  end;
  when(CRITERIA='D') do;
   ToVar_D[1]=FromVar[5];
   do i=2 to dim(ToVar_D);ToVar_D[i]=FromVar[i];end;
  end;
  otherwise;
 end;
 if last.UID and FLAG;
 drop i FLAG CRITERIA VARIABLE1-VARIABLE5;
run;

View solution in original post

6 REPLIES 6
learsaas
Quartz | Level 8
proc sort data=work.have(keep=UID CRITERIA VARIABLE1-VARIABLE5) out=result;
 by UID;
 where CRITERIA in ('A','B','C','D');
run;
data result;
 set result;
 by UID;
 retain 
  A_VAR1-A_VAR4 
  B_VAR1-B_VAR4 
  C_VAR1-C_VAR4 
  D_VAR5 D_VAR2 D_VAR3
 ;
 array FromVar{*} VARIABLE1-VARIABLE5;
 array ToVar_A{*} $9 A_VAR1-A_VAR4; 
 array ToVar_B{*} $9 B_VAR1-B_VAR4; 
 array ToVar_C{*} $9 C_VAR1-C_VAR4; 
 array ToVar_D{*} $9 D_VAR5 D_VAR2 D_VAR3; 
 retain FLAG;
 if first.UID then do;
  call missing(of ToVar_A{*});
  call missing(of ToVar_B{*});
  call missing(of ToVar_C{*});
  call missing(of ToVar_D{*});
  FLAG=0;
 end;
 select;
  when(CRITERIA='A') do;
   do i=1 to dim(ToVar_A);ToVar_A[i]=FromVar[i];end;
   FLAG=1;
  end;
  when(CRITERIA='B') do;
   do i=1 to dim(ToVar_B);ToVar_B[i]=FromVar[i];end;
  end;
  when(CRITERIA='C') do;
   do i=1 to dim(ToVar_C);ToVar_C[i]=FromVar[i];end;
  end;
  when(CRITERIA='D') do;
   ToVar_D[1]=FromVar[5];
   do i=2 to dim(ToVar_D);ToVar_D[i]=FromVar[i];end;
  end;
  otherwise;
 end;
 if last.UID and FLAG;
 drop i FLAG CRITERIA VARIABLE1-VARIABLE5;
run;
Astounding
PROC Star

This revised version looks like a big improvement.  Did you have a question about it?

Reeza
Super User

@learsaas wrote:

https://communities.sas.com/t5/SAS-Programming/Row-to-variable/td-p/522615


Yup, still works as designed. 

 

Essentially what @Sanflo is doing is a transpose task. But their solution is hard coded for the values so doesn't generalize still. 

The solution posted, with the appropriate changes for the new data, still works. 

 

delete_transpose.PNG

mkeintz
PROC Star

Your are sorted by UID, so you can merge subsets of variables (renamed) by id, as in:

 

data work.have;
	input UID :3. CRITERIA :$8. VARIABLE1 :$9. VARIABLE2 :$9. VARIABLE3 :$9. VARIABLE4 :$9. VARIABLE5 :$9. VARIABLE6 :$9. VARIABLE7 :$9. VARIABLE8 :$9.;
	infile datalines dlm=',';
	datalines;
1,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
1,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
1,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
1,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
1,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
2,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
2,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
2,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
2,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
2,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
3,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
3,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
3,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
3,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
3,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
4,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
4,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
4,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
4,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
4,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
5,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
5,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
5,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
5,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
5,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
;
run;



data want (drop=criteria);
  merge have (where=(criteria='A') 
              rename=(variable1=A_variable1 variable2=A_variable2 variable3=A_variable3 variable4=A_variable4  
                      variable5=A_variable5 variable6=A_variable6 variable7=A_variable7 variable8=A_variable8)  )
        have (where=(criteria='B') 
              rename=(variable1=B_variable1 variable2=B_variable2 variable3=B_variable3 variable4=B_variable4
                      variable5=B_variable5 variable6=B_variable6 variable7=B_variable7 variable8=B_variable8)  )
        have (where=(criteria='C') 
              rename=(variable1=C_variable1 variable2=C_variable2 variable3=C_variable3 variable4=C_variable4
                      variable5=C_variable5 variable6=C_variable6 variable7=C_variable7 variable8=C_variable8)  )
        have (where=(criteria='D') 
              rename=(variable1=D_variable1 variable2=D_variable2 variable3=D_variable3 variable4=D_variable4
                      variable5=D_variable5 variable6=D_variable6 variable7=D_variable7 variable8=D_variable8)  )
       ;
  by uid;
run;

 

And if the task of typing all those renames is a problem, then the PROC SQL access to dictionary.columns can help you write those renames directly from the meta-data for data set HAVE:

 

proc sql noprint;
  select distinct cats(name,'=A_',name),
                  cats(name,'=B_',name),
                  cats(name,'=C_',name),
                  cats(name,'=D_',name)
		 into :renamea separated by ' ',
              :renameb separated by ' ',
              :renamec separated by ' ',
              :renamed separated by ' '
		 from dictionary.columns
		 where libname='WORK' and memname='HAVE' and not upcase(name) in('UID','CRITERIA');
quit;

data want1 (drop=criteria);
  merge have (where=(criteria='A') rename=(&renamea))
        have (where=(criteria='B') rename=(&renameb))
        have (where=(criteria='C') rename=(&renamec))
        have (where=(criteria='D') rename=(&renamed));
  by uid;
run;
--------------------------
The hash OUTPUT method will overwrite a SAS data set, but not append. That can be costly. Consider voting for Add a HASH object method which would append a hash object to an existing SAS data set

Would enabling PROC SORT to simultaneously output multiple datasets be useful? Then vote for
Allow PROC SORT to output multiple datasets

--------------------------
Sanflo
Fluorite | Level 6

Both of these solutions also work perfectly.

SAS Innovate 2025: Register Now

Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 6 replies
  • 1213 views
  • 2 likes
  • 5 in conversation