Hi All,
Question ,
Can I perform a combination of joins ( left joins and inner joins ) in hash using multiple tables ?
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A;
do key = 1 to 9;
retain A_value 100;
A_value + 1;
do _n_ = 1 to ceil(key/3);
A_value + 0.01;
output;
end;
A_value = int(A_value);
end;
run;
data B;
do key = 1 to 10 by 2;
B_value = 200 + key;
output;
end;
run;
data C;
do key = 1 to 5;
retain C_value 300;
C_value + 1;
C_value + 0.01; output;
C_value + 0.01; output;
C_value = int(C_value);
end;
run;
proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run;
proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run;
proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run;
proc print noobs data=a_print;
proc print noobs data=b_print;
proc print noobs data=c_print;
proc sql;
create table reference as
select
a.key, a_value, b_value, c_value
from
a
inner join
b
on
a.key=b.key
left join
c
on
c.key = a.key
order by
a.key, a_value, b_value, c_value
;
/* (A inner join B) left join C */
/* expected row count:
* 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14
*/
data D;
if 0 then set a b c;
declare hash HB(dataset:"B", multidata:'yes', ordered:'A');
HB.defineKey("key");
HB.defineData("b_value");
HB.defineDone();
declare hash HC(dataset:"C", multidata:'yes', ordered: 'A');
HC.defineKey("key");
HC.defineData("c_value");
HC.defineDone();
do until(exhausted);
set A end = exhausted;
_rc_b = HB.find();
if _rc_b ne 0 then continue;
do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */
_rc_c = HC.find();
if _rc_c ne 0 then do;
C_value = .;
output;
end;
else
do _cix_ = 1 by 1 until (_cix_ >= 1000);
output;
HC.has_next(result: _rc_cnext);
if _rc_cnext = 0 then leave;
HC.find_next();
end;
HB.has_next(result: _rc_bnext);
if _rc_bnext = 0 then leave;
HB.find_next();
end;
end;
drop _:;
stop;
run;
Yes,
data A;
do x = 1 to 10;
a="A";
output;
end;
run;
data B;
do x = 1 to 10 by 2;
b="B";
output;
end;
run;
data C;
do x = 1 to 5;
c="C";
output;
end;
run;
/* (A inner join B) left join C */
data D;
if 0 then set a b c;
declare hash HB(dataset:"B");
HB.defineKey("x");
HB.defineData("b");
HB.defineDone();
declare hash HC(dataset:"C");
HC.defineKey("x");
HC.defineData("c");
HC.defineDone();
do until(end);
call missing (x,a,b,c);
set A end = end;
HC.find();
if HB.find()=0 then output;
end;
stop;
run;
proc print data =D;
run;
All the best
Bart
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A;
do key = 1 to 9;
retain A_value 100;
A_value + 1;
do _n_ = 1 to ceil(key/3);
A_value + 0.01;
output;
end;
A_value = int(A_value);
end;
run;
data B;
do key = 1 to 10 by 2;
B_value = 200 + key;
output;
end;
run;
data C;
do key = 1 to 5;
retain C_value 300;
C_value + 1;
C_value + 0.01; output;
C_value + 0.01; output;
C_value = int(C_value);
end;
run;
proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run;
proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run;
proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run;
proc print noobs data=a_print;
proc print noobs data=b_print;
proc print noobs data=c_print;
proc sql;
create table reference as
select
a.key, a_value, b_value, c_value
from
a
inner join
b
on
a.key=b.key
left join
c
on
c.key = a.key
order by
a.key, a_value, b_value, c_value
;
/* (A inner join B) left join C */
/* expected row count:
* 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14
*/
data D;
if 0 then set a b c;
declare hash HB(dataset:"B", multidata:'yes', ordered:'A');
HB.defineKey("key");
HB.defineData("b_value");
HB.defineDone();
declare hash HC(dataset:"C", multidata:'yes', ordered: 'A');
HC.defineKey("key");
HC.defineData("c_value");
HC.defineDone();
do until(exhausted);
set A end = exhausted;
_rc_b = HB.find();
if _rc_b ne 0 then continue;
do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */
_rc_c = HC.find();
if _rc_c ne 0 then do;
C_value = .;
output;
end;
else
do _cix_ = 1 by 1 until (_cix_ >= 1000);
output;
HC.has_next(result: _rc_cnext);
if _rc_cnext = 0 then leave;
HC.find_next();
end;
HB.has_next(result: _rc_bnext);
if _rc_bnext = 0 then leave;
HB.find_next();
end;
end;
drop _:;
stop;
run;
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.