Hi All,
Question ,
Can I perform a combination of joins ( left joins and inner joins ) in hash using multiple tables ?
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A; do key = 1 to 9; retain A_value 100; A_value + 1; do _n_ = 1 to ceil(key/3); A_value + 0.01; output; end; A_value = int(A_value); end; run; data B; do key = 1 to 10 by 2; B_value = 200 + key; output; end; run; data C; do key = 1 to 5; retain C_value 300; C_value + 1; C_value + 0.01; output; C_value + 0.01; output; C_value = int(C_value); end; run; proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run; proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run; proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run; proc print noobs data=a_print; proc print noobs data=b_print; proc print noobs data=c_print; proc sql; create table reference as select a.key, a_value, b_value, c_value from a inner join b on a.key=b.key left join c on c.key = a.key order by a.key, a_value, b_value, c_value ; /* (A inner join B) left join C */ /* expected row count: * 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14 */ data D; if 0 then set a b c; declare hash HB(dataset:"B", multidata:'yes', ordered:'A'); HB.defineKey("key"); HB.defineData("b_value"); HB.defineDone(); declare hash HC(dataset:"C", multidata:'yes', ordered: 'A'); HC.defineKey("key"); HC.defineData("c_value"); HC.defineDone(); do until(exhausted); set A end = exhausted; _rc_b = HB.find(); if _rc_b ne 0 then continue; do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */ _rc_c = HC.find(); if _rc_c ne 0 then do; C_value = .; output; end; else do _cix_ = 1 by 1 until (_cix_ >= 1000); output; HC.has_next(result: _rc_cnext); if _rc_cnext = 0 then leave; HC.find_next(); end; HB.has_next(result: _rc_bnext); if _rc_bnext = 0 then leave; HB.find_next(); end; end; drop _:; stop; run;
Yes,
data A;
do x = 1 to 10;
a="A";
output;
end;
run;
data B;
do x = 1 to 10 by 2;
b="B";
output;
end;
run;
data C;
do x = 1 to 5;
c="C";
output;
end;
run;
/* (A inner join B) left join C */
data D;
if 0 then set a b c;
declare hash HB(dataset:"B");
HB.defineKey("x");
HB.defineData("b");
HB.defineDone();
declare hash HC(dataset:"C");
HC.defineKey("x");
HC.defineData("c");
HC.defineDone();
do until(end);
call missing (x,a,b,c);
set A end = end;
HC.find();
if HB.find()=0 then output;
end;
stop;
run;
proc print data =D;
run;
All the best
Bart
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A; do key = 1 to 9; retain A_value 100; A_value + 1; do _n_ = 1 to ceil(key/3); A_value + 0.01; output; end; A_value = int(A_value); end; run; data B; do key = 1 to 10 by 2; B_value = 200 + key; output; end; run; data C; do key = 1 to 5; retain C_value 300; C_value + 1; C_value + 0.01; output; C_value + 0.01; output; C_value = int(C_value); end; run; proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run; proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run; proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run; proc print noobs data=a_print; proc print noobs data=b_print; proc print noobs data=c_print; proc sql; create table reference as select a.key, a_value, b_value, c_value from a inner join b on a.key=b.key left join c on c.key = a.key order by a.key, a_value, b_value, c_value ; /* (A inner join B) left join C */ /* expected row count: * 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14 */ data D; if 0 then set a b c; declare hash HB(dataset:"B", multidata:'yes', ordered:'A'); HB.defineKey("key"); HB.defineData("b_value"); HB.defineDone(); declare hash HC(dataset:"C", multidata:'yes', ordered: 'A'); HC.defineKey("key"); HC.defineData("c_value"); HC.defineDone(); do until(exhausted); set A end = exhausted; _rc_b = HB.find(); if _rc_b ne 0 then continue; do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */ _rc_c = HC.find(); if _rc_c ne 0 then do; C_value = .; output; end; else do _cix_ = 1 by 1 until (_cix_ >= 1000); output; HC.has_next(result: _rc_cnext); if _rc_cnext = 0 then leave; HC.find_next(); end; HB.has_next(result: _rc_bnext); if _rc_bnext = 0 then leave; HB.find_next(); end; end; drop _:; stop; run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.