/* i want to use min and max in my key to join through HASH like in SQL :
min(t1.keyvar,5) = t2.keyvar)
*/
/* i have 45 to 49 in my left table and i have 50 in the table i want to join */
/* i am using the following code in my hash left join code for that , but when i do that the "varkey" column is not the original one ( not the orignal value) in my result table , the max replace the original value (here 50 ) */
data want (drop=rc);
/*iterate left data set*/
set have ;
/*declare variables from hash set*/
length b rc 8 ;
/*declare hash*/
if _n_=1 then
do;
declare hash hhh(dataset: "tableb (keep=keyvar b)", multidata:'y');
hhh.DefineKey('keyvar');
hhh.DefineData(ALL:'YES');
hhh.DefineDone();
end;
do until(Lastobs);
set have end=Lastobs;
call missing(b);
rc = hhh.find(key:max(50,min(keyvar,55)));
if rc ne 0 then
do;
var=.
output;
end;
do while (rc=0);
var =a*b;
output;
rc = hhh.find_next();
end;
end;
run;
/* i think the reason is because i put ALL:YES in definedata , if i remove AGE i have the table that i expect , is there a way/ option to put all but only remove a list of variable in the definedata() ? */
/* thank everyone for your help i appreciate 🙂 */
data want (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData('coef');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
Can you post some example data and what you want the desired result to look like from that data?
Also, you are missing a semicolon setting var to missing. And proper indentation doesn't hurt either 🙂
data want (drop=rc);
set have;
length b rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "tableb (keep=keyvar b)", multidata:'y');
hhh.DefineKey('keyvar');
hhh.DefineData(ALL:'YES');
hhh.DefineDone();
end;
do until(Lastobs);
set have end=Lastobs;
call missing(b);
rc = hhh.find(key:max(50,min(keyvar,55)));
if rc ne 0 then do;
var=.;
output;
end;
do while (rc=0);
var =a*b;
output;
rc = hhh.find_next();
end;
end;
run;
/* i want to replicate the followinf sql code in hash * /
proc sql;
create table want as
select a.*,b.coef
from bigtable as a
left join smalltable (keep=AGE coef) as b
on max(50,min(a.AGE,55))=b.AGE
;
quit;
Hi @bebess Can you post sample(example data) that @PeterClemmensen requested to work with
I don't know about others. Pardon me, I am too lazy
DATA smalltable;
INPUT age coef ;
DATALINES ;
50 1
51 2
52 3
53 4
54 5
55 6
;
RUN;
DATA bigtable;
INPUT age ;
DATALINES ;
41 1
41 2
42 3
43 4
44 5
45 6
;
RUN;
/* sql : result i want to have */
proc sql;
create table want as
select a.*,b.coef
from bigtable as a
left join smalltable (keep=AGE coef) as b
on max(50,min(a.AGE,55))=b.AGE
;
quit;
/* Test: how to do in hash , I HAVE age=50 in the result table whereas i expect to have 41 to 45*/
data want (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData(ALL:'YES');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
/* i think the reason is because i put ALL:YES in definedata , if i remove AGE i have the table that i expect , is there a way/ option to put all but only remove a list of variable in the definedata() ? */
/* thank everyone for your help i appreciate 🙂 */
data want (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData('coef');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
data want2 (drop=rc);
set bigtable;
length coef rc 8;
if _n_=1 then do;
declare hash hhh(dataset: "smalltable (keep=AGE coef)", multidata:'y');
hhh.DefineKey('AGE');
hhh.DefineData('coef');
hhh.DefineDone();
end;
do until(Lastobs);
set bigtable end=Lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(AGE,55)));
if rc ne 0 then do;
var_calculated=.;
output;
end;
do while (rc=0);
var_calculated =AGE*coef;
output;
rc = hhh.find_next();
end;
end;
run;
What about renaming AGE from smalltable?
data want(drop=rc _age);
length _age coef 8;
if _n_=1 then do;
declare hash hhh(dataset:'smalltable(keep=age coef rename=(age=_age))', multidata:'y');
hhh.definekey('_age');
hhh.definedata(all:'y');
hhh.definedone();
end;
do until(lastobs);
set bigtable end=lastobs;
call missing(_age,coef);
rc = hhh.find(key:max(50,min(age,55)));
if rc ne 0 then output;
else do while(rc=0);
output;
rc = hhh.find_next();
end;
end;
run;
Edit: Inserted "else" for a (marginal) performance improvement.
@bebess wrote:
rc = hhh.find(key:max(50,min(AGE,55)));
By the way, missing values of AGE in bigtable would be matched with observations from smalltable with AGE=55. I hope this is what you intended (or there are no obs. with missing AGE in bigtable).
@bebess wrote:I HAVE age=50 in the result table whereas i expect to have 41 to 45
Hi @bebess,
This is because you overwrite the age values with those from SMALLTABLE. To avoid this you can restrict the data items of the hash object to 'coef':
data want(drop=rc);
length coef 8;
if _n_=1 then do;
declare hash hhh(dataset:'smalltable(keep=age coef)', multidata:'y');
hhh.definekey('age');
hhh.definedata('coef');
hhh.definedone();
end;
do until(lastobs);
set bigtable end=lastobs;
call missing(coef);
rc = hhh.find(key:max(50,min(age,55)));
if rc ne 0 then output;
do while(rc=0);
output;
rc = hhh.find_next();
end;
end;
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.