Hi All,
Question ,
Can I perform a combination of joins ( left joins and inner joins ) in hash using multiple tables ?
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A;
do key = 1 to 9;
retain A_value 100;
A_value + 1;
do _n_ = 1 to ceil(key/3);
A_value + 0.01;
output;
end;
A_value = int(A_value);
end;
run;
data B;
do key = 1 to 10 by 2;
B_value = 200 + key;
output;
end;
run;
data C;
do key = 1 to 5;
retain C_value 300;
C_value + 1;
C_value + 0.01; output;
C_value + 0.01; output;
C_value = int(C_value);
end;
run;
proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run;
proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run;
proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run;
proc print noobs data=a_print;
proc print noobs data=b_print;
proc print noobs data=c_print;
proc sql;
create table reference as
select
a.key, a_value, b_value, c_value
from
a
inner join
b
on
a.key=b.key
left join
c
on
c.key = a.key
order by
a.key, a_value, b_value, c_value
;
/* (A inner join B) left join C */
/* expected row count:
* 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14
*/
data D;
if 0 then set a b c;
declare hash HB(dataset:"B", multidata:'yes', ordered:'A');
HB.defineKey("key");
HB.defineData("b_value");
HB.defineDone();
declare hash HC(dataset:"C", multidata:'yes', ordered: 'A');
HC.defineKey("key");
HC.defineData("c_value");
HC.defineDone();
do until(exhausted);
set A end = exhausted;
_rc_b = HB.find();
if _rc_b ne 0 then continue;
do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */
_rc_c = HC.find();
if _rc_c ne 0 then do;
C_value = .;
output;
end;
else
do _cix_ = 1 by 1 until (_cix_ >= 1000);
output;
HC.has_next(result: _rc_cnext);
if _rc_cnext = 0 then leave;
HC.find_next();
end;
HB.has_next(result: _rc_bnext);
if _rc_bnext = 0 then leave;
HB.find_next();
end;
end;
drop _:;
stop;
run;
Yes,
data A;
do x = 1 to 10;
a="A";
output;
end;
run;
data B;
do x = 1 to 10 by 2;
b="B";
output;
end;
run;
data C;
do x = 1 to 5;
c="C";
output;
end;
run;
/* (A inner join B) left join C */
data D;
if 0 then set a b c;
declare hash HB(dataset:"B");
HB.defineKey("x");
HB.defineData("b");
HB.defineDone();
declare hash HC(dataset:"C");
HC.defineKey("x");
HC.defineData("c");
HC.defineDone();
do until(end);
call missing (x,a,b,c);
set A end = end;
HC.find();
if HB.find()=0 then output;
end;
stop;
run;
proc print data =D;
run;
All the best
Bart
Be careful though. They are not the same as a SQL inner join when the join criteria contain an N to M match which should produce NxM rows. The minimal (simplest) HASH declaration is for a unique keyed data set, so the hash join scenarios are for related data with 1:1 or N:1 mappings.
A complete replication of joining would need to deal with multidata:'yes' and cross looping.
Example:
data A;
do key = 1 to 9;
retain A_value 100;
A_value + 1;
do _n_ = 1 to ceil(key/3);
A_value + 0.01;
output;
end;
A_value = int(A_value);
end;
run;
data B;
do key = 1 to 10 by 2;
B_value = 200 + key;
output;
end;
run;
data C;
do key = 1 to 5;
retain C_value 300;
C_value + 1;
C_value + 0.01; output;
C_value + 0.01; output;
C_value = int(C_value);
end;
run;
proc transpose data=a out=a_print(drop=_name_) prefix=A_value; by key; run;
proc transpose data=b out=b_print(drop=_name_) prefix=B_value; by key; run;
proc transpose data=c out=c_print(drop=_name_) prefix=C_value; by key; run;
proc print noobs data=a_print;
proc print noobs data=b_print;
proc print noobs data=c_print;
proc sql;
create table reference as
select
a.key, a_value, b_value, c_value
from
a
inner join
b
on
a.key=b.key
left join
c
on
c.key = a.key
order by
a.key, a_value, b_value, c_value
;
/* (A inner join B) left join C */
/* expected row count:
* 1:1x1x2 + 3:1x1x2 + 5:2x1x2 + 7:3x1 + 9:3x1 = 14
*/
data D;
if 0 then set a b c;
declare hash HB(dataset:"B", multidata:'yes', ordered:'A');
HB.defineKey("key");
HB.defineData("b_value");
HB.defineDone();
declare hash HC(dataset:"C", multidata:'yes', ordered: 'A');
HC.defineKey("key");
HC.defineData("c_value");
HC.defineDone();
do until(exhausted);
set A end = exhausted;
_rc_b = HB.find();
if _rc_b ne 0 then continue;
do _bix_ = 1 by 1 until (_bix_ >= 1000); /* simulate SQL LOOPS= option */
_rc_c = HC.find();
if _rc_c ne 0 then do;
C_value = .;
output;
end;
else
do _cix_ = 1 by 1 until (_cix_ >= 1000);
output;
HC.has_next(result: _rc_cnext);
if _rc_cnext = 0 then leave;
HC.find_next();
end;
HB.has_next(result: _rc_bnext);
if _rc_bnext = 0 then leave;
HB.find_next();
end;
end;
drop _:;
stop;
run;
April 27 – 30 | Gaylord Texan | Grapevine, Texas
Walk in ready to learn. Walk out ready to deliver. This is the data and AI conference you can't afford to miss.
Register now and lock in 2025 pricing—just $495!
Still thinking about your presentation idea? The submission deadline has been extended to Friday, Nov. 14, at 11:59 p.m. ET.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.