I have three tables which I want to join: tab1 - about 55 millions rows (size 25 GB) tab2 - 2,5 millions rows tab3 - 1,5 millions rows proc sql looks like: proc sql;
create table tab4 as
select
t1.*
,t2.col_2
,coalesce(t1.col_3, t3.col_3) as col_3_new
,coalesce(t1.col_4, t3.col_4) as col_4_new
from tab1 t1
left join tab2 t2 on t1.id = t2.id
left join tab3 t3
on t1.id = t3.id
and t1.id_1 = t3.id_1
and t1.id_2 = t3.id_2
;
quit;
data tab4;
set tab4;
drop col_3 col_4;
rename col_3_new = col_3 col_4_new = col_4;
run; I tried to use hash tables instead of proc sql, but I don't know if I do right and how to include coalesce(): data tab4;
if _n_=1 then
do;
/* map tables */
if 0 then set tab1 tab2 tab3;
/* define and load hashes */
dcl hash h_b(dataset:'tab2');
h_b.defineKey('id');
h_b.defineData('col_2');
h_b.defineDone();
dcl hash h_c(dataset:'tab3');
h_c.defineKey('id', 'id_1', 'id_2');
h_c.defineData('col_3', 'col_4');
h_c.defineDone();
end;
call missing(of _all_);
set tab1;
_rc=h_b.find(key: id);
_rc=h_c.find(key: id, key:id_1, key:id_2);
run; Maybe is it better method to join these tables?
... View more