BookmarkSubscribeRSS Feed
☑ This topic is solved. Need further help from the community? Please sign in and ask a new question.
Ronein
Meteorite | Level 14

Hello

I am using Hash method to merge data sets.

The by variables to merge have different names.

Here is the code I run that produce error.

In real life data sets are very big so I don't want to rename variables of data set VBM374_USED_BRANCH_CUSTOMER.

It means that the rename should be done during Hash process and not before.

Data ABT_ALL;
input lak_id snif X y Z;
cards;
111111 987 10 20 30
222222 921 15 30 45
333333 940 20 40 60
;
Run;


Data VBM374_USED_BRANCH_CUSTOMER;
input Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP;
cards;
222222 921 123 
333333 940 456
;
run;


data ABT_ALL_b;
set ABT_ALL;
if _n_ = 1
then do;
declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
h.definekey("Branch_Cust_Nbr","Branch_Nbr"); 
h.definedata("Numerator_DWH");
h.definedone();
if 0 then set VBM374_USED_BRANCH_CUSTOMER (keep=Branch_Cust_Nbr  Branch_Nbr first_Branch_Cust_IP rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif first_Branch_Cust_IP=Numerator_DWH));
call missing(Numerator_DWH);
end;
rc = h.find();
drop rc;
run;

The error

1                                                          The SAS System                          07:19 Thursday, September 7, 2023

1          ;*';*";*/;quit;run;
2          OPTIONS PAGENO=MIN;
3          %LET _CLIENTTASKLABEL='Program (6)';
4          %LET _CLIENTPROCESSFLOWNAME='Process Flow';
5          %LET _CLIENTPROJECTPATH='';
6          %LET _CLIENTPROJECTPATHHOST='';
7          %LET _CLIENTPROJECTNAME='';
8          %LET _SASPROGRAMFILE='';
9          %LET _SASPROGRAMFILEHOST='';
10         
11         ODS _ALL_ CLOSE;
12         OPTIONS DEV=PNG;
13         GOPTIONS XPIXELS=0 YPIXELS=0;
14         FILENAME EGSR TEMP;
15         ODS tagsets.sasreport13(ID=EGSR) FILE=EGSR
16             STYLE=HTMLBlue
17             STYLESHEET=(URL="file:///C:/Program%20Files/SASHome/SASEnterpriseGuide/7.1/Styles/HTMLBlue.css")
18             NOGTITLE
19             NOGFOOTNOTE
20             GPATH=&sasworklocation
21             ENCODING=UTF8
22             options(rolap="on")
23         ;
NOTE: Writing TAGSETS.SASREPORT13(EGSR) Body file: EGSR
24         
25         GOPTIONS ACCESSIBLE;
26         data ABT_ALL_b;
27         set ABT_ALL;
28         if _n_ = 1
29         then do;
30         declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
31         h.definekey("Branch_Cust_Nbr","Branch_Nbr");
32         h.definedata("Numerator_DWH");
33         h.definedone();
34         if 0 then set VBM374_USED_BRANCH_CUSTOMER (keep=Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP
34       ! rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif first_Branch_Cust_IP=Numerator_DWH));
35         call missing(Numerator_DWH);
36         end;
37         rc = h.find();
38         drop rc;
39         run;

ERROR: Undeclared key symbol Branch_Cust_Nbr for hash object at line 33 column 1.
ERROR: DATA STEP Component Object failure.  Aborted during the EXECUTION phase.
NOTE: The SAS System stopped processing this step because of errors.
NOTE: There were 1 observations read from the data set WORK.ABT_ALL.
WARNING: The data set WORK.ABT_ALL_B may be incomplete.  When this step was stopped there were 0 observations and 6 variables.
NOTE: DATA statement used (Total process time):
      real time           0.03 seconds
      user cpu time       0.01 seconds
      system cpu time     0.01 seconds
      memory              810.68k
      OS Memory           26024.00k
      Timestamp           09/07/2023 01:18:27 PM
      Step Count                        168  Switch Count  2
      Page Faults                       0
      Page Reclaims                     172
      Page Swaps                        0
2                                                          The SAS System                          07:19 Thursday, September 7, 2023

      Voluntary Context Switches        44
      Involuntary Context Switches      1
      Block Input Operations            0
      Block Output Operations           0
      

40         
41         GOPTIONS NOACCESSIBLE;
42         %LET _CLIENTTASKLABEL=;
43         %LET _CLIENTPROCESSFLOWNAME=;
44         %LET _CLIENTPROJECTPATH=;
45         %LET _CLIENTPROJECTPATHHOST=;
46         %LET _CLIENTPROJECTNAME=;
47         %LET _SASPROGRAMFILE=;
48         %LET _SASPROGRAMFILEHOST=;
49         
50         ;*';*";*/;quit;run;
51         ODS _ALL_ CLOSE;
52         
53         
54         QUIT; RUN;
55         

 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Tom
Super User Tom
Super User

Why all of the renames?  Just use the KEY: option on the FIND() command to tell the hash object which variables have the key.

data ABT_ALL;
  input lak_id snif X y Z;
cards;
111111 987 10 20 30
222222 921 15 30 45
333333 940 20 40 60
;

data VBM374_USED_BRANCH_CUSTOMER;
  input Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP;
cards;
222222 921 123 
333333 940 456
;

data ABT_ALL_b;
  set ABT_ALL;
  if 0 then set VBM374_USED_BRANCH_CUSTOMER(keep=Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP);
  if _n_ = 1 then do;
    declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
    h.definekey("Branch_Cust_Nbr","Branch_Nbr"); 
    h.definedata("first_Branch_Cust_IP");
    h.definedone();
  end;
  if not h.find(key:lak_id,key:snif) then Numerator_DWH=first_Branch_Cust_IP;
  drop Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP ;
run;

proc print;
run;

Result

Obs    lak_id    snif     X     y     Z    Numerator_DWH

 1     111111     987    10    20    30         .
 2     222222     921    15    30    45       123
 3     333333     940    20    40    60       456

If you do want to rename them then do in the DATASET: option and the DEFINEKEY() and DEFINEDATA() options.

data ABT_ALL_b;
  set ABT_ALL;
  if _n_ = 1 then do;
    declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER
     (rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif
             first_Branch_Cust_IP=Numerator_DWH))"
    );
    h.definekey("lak_id","snif"); 
    h.definedata("Numerator_DWH");
    h.definedone();
  end;
  if h.find() then Numerator_DWH=.;
run;

View solution in original post

17 REPLIES 17
andreas_lds
Jade | Level 19

Move if 0 then set .... before declaring the hash.

Ronein
Meteorite | Level 14

No work,still error

ERROR: Undeclared key symbol Branch_Cust_Nbr for hash object at line 33 column 1.
ERROR: DATA STEP Component Object failure. Aborted during the EXECUTION phase.

 

data ABT_ALL_b;
set ABT_ALL;
if _n_ = 1 then do;
if 0 then set VBM374_USED_BRANCH_CUSTOMER (keep=Branch_Cust_Nbr  Branch_Nbr first_Branch_Cust_IP rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif first_Branch_Cust_IP=Numerator_DWH));
declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
h.definekey("Branch_Cust_Nbr","Branch_Nbr"); 
h.definedata("Numerator_DWH");
h.definedone();
call missing(Numerator_DWH);
end;
rc = h.find();
drop rc;
run;
Amir
PROC Star

Hi,

 

It looks like you are renaming Branch_Cust_Nbr to lak_id, but later you refer back to Branch_Cust_Nbr. Try instead to refer to lak_id, and the same goes for any other variables you have renamed and then are referring to later.

 

You might also need to have a rename of the same variables when declaring the hash, if possible, otherwise maybe remove the rename.

 

 

Thanks & kind regards,

Amir.

 

Edit: Typo.

Edit2: Added second suggestion.

Ronein
Meteorite | Level 14
May you please show code Amir?
andreas_lds
Jade | Level 19

You are renaming Branch_Cust_Nbr to lak_id, so you can't use Branch_Cust_Nbr as key in the hash object

set ... rename=(Branch_Cust_Nbr=lak_id ...

 

Ronein
Meteorite | Level 14
But the rename was done on data set "VBM374_USED_BRANCH_CUSTOMER" and not on data set "ABT_ALL" . Here you wrote the rename on data set ABT_ALL
Kurt_Bremser
Super User

Your IF 0 THEN SET has a RENAME= dataset option, which means the variables are defined into the PDV with other names than those you use in the DEFINE methods.

Use the same options in the DECLARE statement, and use the RENAMEd names in the DEFINEs.

Ronein
Meteorite | Level 14

Do you mean that in  h.definekey and h.definedata I should use the renamed columns?

This code still have error

ERROR: Variable lak_id is not on file WORK.VBM374_USED_BRANCH_CUSTOMER.
ERROR: Hash data set load failed at line 33 column 1.
ERROR: DATA STEP Component Object failure. Aborted during the EXECUTION phase.

data WANT;
set ABT_ALL;
if _n_ = 1
then do;
declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
h.definekey("lak_id","snif"); 
h.definedata("Numerator_DWH");
h.definedone();
if 0 then set VBM374_USED_BRANCH_CUSTOMER (keep=Branch_Cust_Nbr  Branch_Nbr first_Branch_Cust_IP
                                           rename=(Branch_Cust_Nbr=lak_id   Branch_Nbr=snif   first_Branch_Cust_IP=Numerator_DWH));
call missing(Numerator_DWH);
end;
rc = h.find();
drop rc;
run;
Amir
PROC Star

Hi @Ronein,

 

I hope it is clear what is required now, based on the follow up from @andreas_lds and @Kurt_Bremser.

 

When renaming variables for an incoming data set, e.g., old_name = new_name, you should refer to new_name in the rest of the data step.

 

If after making changes you're still getting issues then please post the log showing the data step that you've tried to fix and any notes, warnings or error messages, using the Insert Code icon "</>".

 

 

Thanks & kind regards,

Amir.

Tom
Super User Tom
Super User

Why all of the renames?  Just use the KEY: option on the FIND() command to tell the hash object which variables have the key.

data ABT_ALL;
  input lak_id snif X y Z;
cards;
111111 987 10 20 30
222222 921 15 30 45
333333 940 20 40 60
;

data VBM374_USED_BRANCH_CUSTOMER;
  input Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP;
cards;
222222 921 123 
333333 940 456
;

data ABT_ALL_b;
  set ABT_ALL;
  if 0 then set VBM374_USED_BRANCH_CUSTOMER(keep=Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP);
  if _n_ = 1 then do;
    declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
    h.definekey("Branch_Cust_Nbr","Branch_Nbr"); 
    h.definedata("first_Branch_Cust_IP");
    h.definedone();
  end;
  if not h.find(key:lak_id,key:snif) then Numerator_DWH=first_Branch_Cust_IP;
  drop Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP ;
run;

proc print;
run;

Result

Obs    lak_id    snif     X     y     Z    Numerator_DWH

 1     111111     987    10    20    30         .
 2     222222     921    15    30    45       123
 3     333333     940    20    40    60       456

If you do want to rename them then do in the DATASET: option and the DEFINEKEY() and DEFINEDATA() options.

data ABT_ALL_b;
  set ABT_ALL;
  if _n_ = 1 then do;
    declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER
     (rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif
             first_Branch_Cust_IP=Numerator_DWH))"
    );
    h.definekey("lak_id","snif"); 
    h.definedata("Numerator_DWH");
    h.definedone();
  end;
  if h.find() then Numerator_DWH=.;
run;
Ronein
Meteorite | Level 14

Great great,

So SAS knows to connect Branch_Cust_Nbr with lak_id and Branch_Nbr  with snif?

Ronein_0-1694119522374.png

I also want to ask please- Is Hash method Sort the data (in the background)?

What is the advantage if using this method for merging data sets?

Tom
Super User Tom
Super User

@Ronein wrote:

Great great,

So SAS knows to connect Branch_Cust_Nbr with lak_id and Branch_Nbr  with snif?

Ronein_0-1694119522374.png

I also want to ask please- Is Hash method Sort the data (in the background)?

What is the advantage if using this method for merging data sets?


It does not so much sort the data has put into bins to make it easier to find.

 

If the datasets are already sorted (and why aren't they) then the fastest method is a normal data step MERGE.

 

If one of the datasets is small enough to fit into memory than loading it into a HASH object means you do not have to sort the other (larger) dataset.  That can be doubly important if you need that dataset sorted in some other way for you later operations.

Ronein
Meteorite | Level 14

Thanks friends,

Here is the summary of the solutions:

Data ABT_ALL;
input lak_id snif X y Z;
cards;
111111 987 10 20 30
222222 921 15 30 45
333333 940 20 40 60
;
Run;

Data VBM374_USED_BRANCH_CUSTOMER;
input Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP;
cards;
222222 921 123 
333333 940 456
;
run;

/************Left join*****************************************/
/************Left join*****************************************/
/************Left join*****************************************/
data WAY1;
set ABT_ALL;
if _n_ = 1 then do;
declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER
                       (rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif
                        first_Branch_Cust_IP=Numerator_DWH))");
h.definekey("lak_id","snif"); 
h.definedata("Numerator_DWH");
h.definedone();
end;
if h.find() then Numerator_DWH=.;
run;


data WAY2;
set ABT_ALL;
if _n_ = 1
then do;
declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER
                       (rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif
                        first_Branch_Cust_IP=Numerator_DWH))");
h.definekey("lak_id","snif"); 
h.definedata("Numerator_DWH");
h.definedone();
if 0 then set VBM374_USED_BRANCH_CUSTOMER(rename=(Branch_Cust_Nbr=lak_id Branch_Nbr=snif first_Branch_Cust_IP=Numerator_DWH));
call missing(Numerator_DWH);
end;
rc = h.find();
drop rc;
run;



data WAY3;
set ABT_ALL;
if 0 then set VBM374_USED_BRANCH_CUSTOMER(keep=Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP);
if _n_ = 1 then do;
declare hash h (dataset:"VBM374_USED_BRANCH_CUSTOMER");
h.definekey("Branch_Cust_Nbr","Branch_Nbr"); 
h.definedata("first_Branch_Cust_IP");
h.definedone();
end;
if not h.find(key:lak_id,key:snif) then Numerator_DWH=first_Branch_Cust_IP;
drop Branch_Cust_Nbr Branch_Nbr first_Branch_Cust_IP ;
run;

SAS Innovate 2025: Save the Date

 SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

Save the date!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 17 replies
  • 2634 views
  • 7 likes
  • 5 in conversation