Hi all, I have some code which I think could be improved through combining multiple DATA steps into one. Currently I generate multiple tables where I keep specific values (the actual dataset has a lot more variables) where a specific criteria is met. After I then join them all together with the PROC SQL function based on a unique identifier so they can then be joined with other tables. Below I've attached some sample data of how it currently operates. Any help would be appreciated. Many thanks, Sandy data work.have;
input UID :3. CRITERIA :$8. VARIABLE1 :$9. VARIABLE2 :$9. VARIABLE3 :$9. VARIABLE4 :$9. VARIABLE5 :$9. VARIABLE6 :$9. VARIABLE7 :$9. VARIABLE8 :$9.;
infile datalines dlm=',';
datalines;
1,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
1,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
1,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
1,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
1,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
2,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
2,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
2,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
2,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
2,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
3,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
3,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
3,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
3,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
3,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
4,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
4,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
4,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
4,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
4,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
5,A,AVALUE1,AVALUE2,AVALUE3,AVALUE4,AVALUE5,AVALUE6,AVALUE7,AVALUE8
5,B,BVALUE1,BVALUE2,BVALUE3,BVALUE4,BVALUE5,BVALUE6,BVALUE7,BVALUE8
5,C,CVALUE1,CVALUE2,CVALUE3,CVALUE4,CVALUE5,CVALUE6,CVALUE7,CVALUE8
5,D,DVALUE1,DVALUE2,DVALUE3,DVALUE4,DVALUE5,DVALUE6,DVALUE7,DVALUE8
5,E,EVALUE1,EVALUE2,EVALUE3,EVALUE4,EVALUE5,EVALUE6,EVALUE7,EVALUE8
;
run;
data work.data1;
set work.have;
keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
where CRITERIA = 'A';
run;
data work.data2;
set work.have;
keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
where CRITERIA = 'B';
run;
data work.data3;
set work.have;
keep UID VARIABLE1 VARIABLE2 VARIABLE3 VARIABLE4;
where CRITERIA = 'C';
run;
data work.data4;
set work.have;
keep UID VARIABLE5 VARIABLE2 VARIABLE3;
where CRITERIA = 'D';
run;
proc sql;
create table work.want as
select t1.UID,
t1.VARIABLE1 AS A_VAR1,
t1.VARIABLE2 AS A_VAR2,
t1.VARIABLE3 AS A_VAR3,
t1.VARIABLE4 AS A_VAR4,
t2.VARIABLE1 AS B_VAR1,
t2.VARIABLE2 AS B_VAR2,
t2.VARIABLE3 AS B_VAR3,
t2.VARIABLE4 AS B_VAR4,
t3.VARIABLE1 AS C_VAR1,
t3.VARIABLE2 AS C_VAR2,
t3.VARIABLE3 AS C_VAR3,
t3.VARIABLE4 AS C_VAR4,
t4.VARIABLE5 AS D_VAR5,
t4.VARIABLE2 AS D_VAR2,
t4.VARIABLE3 AS D_VAR3
from work.data1 t1
left join work.data2 t2 on (t2.UID=t1.UID)
left join work.data3 t3 on (t3.UID=t1.UID)
left join work.data4 t4 on (t4.UID=t1.UID);
quit;
... View more