Hi All,
Question ,
Can I perform a combination of joins ( left joins and inner joins ) in hash using multiple tables ?
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A; do key = 1 to 9; retain A_value 100; A_value + 1; do _n_ = 1 to ceil(key/3); A_value + 0.01; output; end; A_value = int(A_value); end; run; data B; do key = 1 to 10 by 2; B_value = 200 + key; output; end; run; data C; do key = 1 to 5; retain C_value 300; C_value + 1; C_value + 0.01; output; C_value + 0.01; output; C_value = int(C_value); end; run; proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run; proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run; proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run; proc print noobs data=a_print; proc print noobs data=b_print; proc print noobs data=c_print; proc sql; create table reference as select a.key, a_value, b_value, c_value from a inner join b on a.key=b.key left join c on c.key = a.key order by a.key, a_value, b_value, c_value ; /* (A inner join B) left join C */ /* expected row count: * 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14 */ data D; if 0 then set a b c; declare hash HB(dataset:"B", multidata:'yes', ordered:'A'); HB.defineKey("key"); HB.defineData("b_value"); HB.defineDone(); declare hash HC(dataset:"C", multidata:'yes', ordered: 'A'); HC.defineKey("key"); HC.defineData("c_value"); HC.defineDone(); do until(exhausted); set A end = exhausted; _rc_b = HB.find(); if _rc_b ne 0 then continue; do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */ _rc_c = HC.find(); if _rc_c ne 0 then do; C_value = .; output; end; else do _cix_ = 1 by 1 until (_cix_ >= 1000); output; HC.has_next(result: _rc_cnext); if _rc_cnext = 0 then leave; HC.find_next(); end; HB.has_next(result: _rc_bnext); if _rc_bnext = 0 then leave; HB.find_next(); end; end; drop _:; stop; run;
Yes,
data A;
do x = 1 to 10;
a="A";
output;
end;
run;
data B;
do x = 1 to 10 by 2;
b="B";
output;
end;
run;
data C;
do x = 1 to 5;
c="C";
output;
end;
run;
/* (A inner join B) left join C */
data D;
if 0 then set a b c;
declare hash HB(dataset:"B");
HB.defineKey("x");
HB.defineData("b");
HB.defineDone();
declare hash HC(dataset:"C");
HC.defineKey("x");
HC.defineData("c");
HC.defineDone();
do until(end);
call missing (x,a,b,c);
set A end = end;
HC.find();
if HB.find()=0 then output;
end;
stop;
run;
proc print data =D;
run;
All the best
Bart
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A; do key = 1 to 9; retain A_value 100; A_value + 1; do _n_ = 1 to ceil(key/3); A_value + 0.01; output; end; A_value = int(A_value); end; run; data B; do key = 1 to 10 by 2; B_value = 200 + key; output; end; run; data C; do key = 1 to 5; retain C_value 300; C_value + 1; C_value + 0.01; output; C_value + 0.01; output; C_value = int(C_value); end; run; proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run; proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run; proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run; proc print noobs data=a_print; proc print noobs data=b_print; proc print noobs data=c_print; proc sql; create table reference as select a.key, a_value, b_value, c_value from a inner join b on a.key=b.key left join c on c.key = a.key order by a.key, a_value, b_value, c_value ; /* (A inner join B) left join C */ /* expected row count: * 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14 */ data D; if 0 then set a b c; declare hash HB(dataset:"B", multidata:'yes', ordered:'A'); HB.defineKey("key"); HB.defineData("b_value"); HB.defineDone(); declare hash HC(dataset:"C", multidata:'yes', ordered: 'A'); HC.defineKey("key"); HC.defineData("c_value"); HC.defineDone(); do until(exhausted); set A end = exhausted; _rc_b = HB.find(); if _rc_b ne 0 then continue; do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */ _rc_c = HC.find(); if _rc_c ne 0 then do; C_value = .; output; end; else do _cix_ = 1 by 1 until (_cix_ >= 1000); output; HC.has_next(result: _rc_cnext); if _rc_cnext = 0 then leave; HC.find_next(); end; HB.has_next(result: _rc_bnext); if _rc_bnext = 0 then leave; HB.find_next(); end; end; drop _:; stop; run;
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.