/* i want to use min and max in my key to join through HASH like in SQL :
min(t1.keyvar,5) = t2.keyvar)
*/
/* i have 45 to 49 in my left table and i have 50 in the table i want to join */
/* i am using the following code in my hash left join code for that , but when i do that the "varkey" column is not the original one ( not the orignal value) in my result table , the max replace the original value (here 50 ) */
data want (drop=rc);
/*iterate left data set*/
set have ;
/*declare variables from hash set*/
length b rc 8 ;
/*declare hash*/
if _n_=1 then
do;
declare hash hhh(dataset: "tableb (keep=keyvar b)", multidata:'y');
hhh.DefineKey('keyvar');
hhh.DefineData(ALL:'YES');
hhh.DefineDone();
end;
do until(Lastobs);
set have end=Lastobs;
call missing(b);
rc = hhh.find(key:max(50,min(keyvar,55)));
if rc ne 0 then
do;
var=.
output;
end;
do while (rc=0);
var =a*b;
output;
rc = hhh.find_next();
end;
end;
run;
/* i think the reason is because i put ALL:YES in definedata , if i remove AGE i have the table that i expect , is there a way/ option to put all but only remove a list of variable in the definedata() ? */
/* thank everyone for your help i appreciate 🙂 */
data want (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData('coef');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
Can you post some example data and what you want the desired result to look like from that data?
Also, you are missing a semicolon setting var to missing. And proper indentation doesn't hurt either 🙂
data want (drop=rc);
set have;
length b rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "tableb (keep=keyvar b)", multidata:'y');
hhh.DefineKey('keyvar');
hhh.DefineData(ALL:'YES');
hhh.DefineDone();
end;
do until(Lastobs);
set have end=Lastobs;
call missing(b);
rc = hhh.find(key:max(50,min(keyvar,55)));
if rc ne 0 then do;
var=.;
output;
end;
do while (rc=0);
var =a*b;
output;
rc = hhh.find_next();
end;
end;
run;
/* i want to replicate the followinf sql code in hash * /
proc sql;
create table want as
select a.*,b.coef
from bigtable as a
left join smalltable (keep=AGE coef) as b
on max(50,min(a.AGE,55))=b.AGE
;
quit;
Hi @bebess Can you post sample(example data) that @PeterClemmensen requested to work with
I don't know about others. Pardon me, I am too lazy
DATA smalltable;
INPUT age coef ;
DATALINES ;
50 1
51 2
52 3
53 4
54 5
55 6
;
RUN;
DATA bigtable;
INPUT age ;
DATALINES ;
41 1
41 2
42 3
43 4
44 5
45 6
;
RUN;
/* sql : result i want to have */
proc sql;
create table want as
select a.*,b.coef
from bigtable as a
left join smalltable (keep=AGE coef) as b
on max(50,min(a.AGE,55))=b.AGE
;
quit;
/* Test: how to do in hash , I HAVE age=50 in the result table whereas i expect to have 41 to 45*/
data want (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData(ALL:'YES');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
/* i think the reason is because i put ALL:YES in definedata , if i remove AGE i have the table that i expect , is there a way/ option to put all but only remove a list of variable in the definedata() ? */
/* thank everyone for your help i appreciate 🙂 */
data want (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData('coef');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
data want2 (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData('coef');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
What about renaming AGE from smalltable?
data want(drop=rc _age);
length _age coef 8;
if _n_=1 then do;
declare hash hhh(dataset:'smalltable(keep=age coef rename=(age=_age))', multidata:'y');
hhh.definekey('_age');
hhh.definedata(all:'y');
hhh.definedone();
end;
do until(lastobs);
set bigtable end=lastobs;
call missing(_age,coef);
rc = hhh.find(key:max(50,min(age,55)));
if rc ne 0 then output;
else do while(rc=0);
output;
rc = hhh.find_next();
end;
end;
run;
Edit: Inserted "else" for a (marginal) performance improvement.
@bebess wrote:
rc = hhh.find(key:max(50,min(AGE,55)));
By the way, missing values of AGE in bigtable would be matched with observations from smalltable with AGE=55. I hope this is what you intended (or there are no obs. with missing AGE in bigtable).
@bebess wrote:I HAVE age=50 in the result table whereas i expect to have 41 to 45
Hi @bebess,
This is because you overwrite the age values with those from SMALLTABLE. To avoid this you can restrict the data items of the hash object to 'coef':
data want(drop=rc);
length coef 8;
if _n_=1 then do;
declare hash hhh(dataset:'smalltable(keep=age coef)', multidata:'y');
hhh.definekey('age');
hhh.definedata('coef');
hhh.definedone();
end;
do until(lastobs);
set bigtable end=lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(age,55)));
if rc ne 0 then output;
do while(rc=0);
output;
rc = hhh.find_next();
end;
end;
run;
April 27 – 30 | Gaylord Texan | Grapevine, Texas
Walk in ready to learn. Walk out ready to deliver. This is the data and AI conference you can't afford to miss.
Register now and lock in 2025 pricing—just $495!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.