<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Inner join using hash in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669835#M200993</link>
    <description>&lt;P&gt;Make your life easier by using an intelligent data structure:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data market_db;
input @1 ID :$4. @6 ID2 :$8. @15 py1 $10. @26 py2 $10. @37 py3 $10. @48 py4 $10.;
infile cards missover ;
cards;
1325 56782322 5872584458 2478569877 8987786548
5489 58654851 8658656767                       2795003597  
3287 29365474 9831765765 5458456844           
8799 89745434 4897568698 3697489567 3265987465
;

data good_numbers;
input ID :$8. confirm_phn :$10.;
infile cards;
cards;
56782322 5872584458
5489     2795003597
89745434 4897568698
2587     5872584458
9876     9831765765
8799     3697489567
;

proc transpose
  data=market_db
  out=long (
    rename=(col1=phn)
    where=(phn ne "")
  )
;
by id notsorted id2;
var py:;
run;

data market_db_intelligent;
set long;
py_type = substr(_name_,3);
drop _name_;
run;

data want;
set market_db_intelligent;
if _n_ = 1
then do;
  length cid $8;
  declare hash good (dataset:"good_numbers (rename=(id=cid confirm_phn=phn))");
  good.definekey('cid','phn');
  good.definedone();
end;
if good.check(key:id2,key:phn) = 0 or good.check(key:id,key:phn) = 0;
drop cid;
run;
  &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;You can see how transposing to a long dataset layout makes coding simple.&lt;/P&gt;</description>
    <pubDate>Thu, 16 Jul 2020 08:42:09 GMT</pubDate>
    <dc:creator>Kurt_Bremser</dc:creator>
    <dc:date>2020-07-16T08:42:09Z</dc:date>
    <item>
      <title>Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669700#M200924</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I have a dataset name market DB which has four phone columns. I want to join this dataset with&amp;nbsp;good_numbers on ID OR ID2 column and keep only matching phone numbers in my output.&lt;BR /&gt;So for example for ID2 &amp;nbsp;56782322, I have a matching phone in good_numbers table so i want to bring in that records. For some ID's there will be more than one match(example -8799)&lt;/P&gt;
&lt;P&gt;I am trying to use hashing but not getting any records back. Could someone please help? Thanks.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Code:&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data market_db;
input @1 ID :$4. @6 ID2 :$8. @15 py1 $10. @26 py22 $10. @37 py3 $10.  @48 py4 $10.;
infile cards missover ;
cards;
1325 56782322 5872584458 2478569877 8987786548
5489 58654851 8658656767                       2795003597  
3287 29365474 9831765765 5458456844           
8799 89745434 4897568698 3697489567 3265987465
;
run;	



Data good_numbers;
input ID :$8. confirm_phn :$10.;
infile cards;
cards;
56782322 5872584458
5489     2795003597
89745434 4897568698
2587     5872584458
9876     9831765765
8799     3697489567
;
Quit;


data want;
set market_db;
array phn py1-py4;
if _N_=1 then do;
dcl hash H(dataset:'good_numbers');
H.definekey('ID','confirm_phn');
H.definedone();
if 0 then set good_numbers;
end;
do over phn;
 confirm_phn=phn;
 if H.find() = 0;
end;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Desired output:&lt;/STRONG&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;TABLE width="423"&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD width="64"&gt;ID&lt;/TD&gt;
&lt;TD width="64"&gt;ID2&lt;/TD&gt;
&lt;TD width="77"&gt;py1&lt;/TD&gt;
&lt;TD width="77"&gt;py2&lt;/TD&gt;
&lt;TD width="64"&gt;py3&lt;/TD&gt;
&lt;TD width="77"&gt;py4&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;1325&lt;/TD&gt;
&lt;TD&gt;56782322&lt;/TD&gt;
&lt;TD&gt;5872584458&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;5489&lt;/TD&gt;
&lt;TD&gt;58654851&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;2795003597&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;8799&lt;/TD&gt;
&lt;TD&gt;89745434&lt;/TD&gt;
&lt;TD&gt;4897568698&lt;/TD&gt;
&lt;TD&gt;3697489567&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 15 Jul 2020 20:12:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669700#M200924</guid>
      <dc:creator>vicky07</dc:creator>
      <dc:date>2020-07-15T20:12:54Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669723#M200935</link>
      <description>&lt;P&gt;There are several problems.&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;You have to tell SAS which key values you're going to use to search in the hash object. Since you are searching both ID and ID2, you have to explicitly list the key values in the find() method.&lt;/LI&gt;
&lt;LI&gt;The length of both ID variables has to be the match the length of your ID variable in your hash object. I didn't include it below, but I set the length of ID in your market_db dataset to $8.&lt;/LI&gt;
&lt;LI&gt;Once you find a match, you need to tell the data step what to do. I used a output and leave statement.&lt;/LI&gt;
&lt;/OL&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
set market_db;
array phn py1-py4;
if _N_=1 then do;
    dcl hash H(dataset:'good_numbers');
    H.definekey('ID','confirm_phn');
    H.definedata('confirm_phn');
    H.definedone();
    if 0 then set good_numbers;
end;
do over phn;
    if H.find(key:ID, key:phn) = 0 or h.find(key:id2, key:phn) = 0 then do;
        output;
        leave; 
    end;
end;
call missing(confirm_phn);
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 15 Jul 2020 21:29:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669723#M200935</guid>
      <dc:creator>ketpt42</dc:creator>
      <dc:date>2020-07-15T21:29:32Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669725#M200937</link>
      <description>&lt;P&gt;Two issues:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1. Read your log. Read your log. Read your log. Read your log. Read your log. Read your log. Read your log. Read your log.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;The length of ID in the WANT data set is 4. Move the&amp;nbsp;&amp;nbsp;&lt;FONT face="courier new,courier"&gt;set&lt;/FONT&gt;&amp;nbsp; statement after the hash table creation.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;Always read your log. Even if there is no warnings and nothing seems wrong. There is a warning here. Always.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;2.&amp;nbsp;The line&amp;nbsp; &lt;FONT face="courier new,courier"&gt;&amp;nbsp;if H.find() = 0;&amp;nbsp;&lt;/FONT&gt; &amp;nbsp; stops all processing for that observation if the test result is &lt;EM&gt;false&lt;/EM&gt;. Because no value is found in PY1, PY2 is never tested.&lt;/P&gt;
&lt;P&gt;You need to learn to debug this kind of things. For example you can add the line&amp;nbsp; &lt;FONT face="courier new,courier"&gt;putlog PHN=;&amp;nbsp;&lt;/FONT&gt; &amp;nbsp;in the&amp;nbsp;&amp;nbsp;&lt;FONT face="courier new,courier"&gt;do over&amp;nbsp;&lt;/FONT&gt; loop.&lt;/P&gt;</description>
      <pubDate>Wed, 15 Jul 2020 22:30:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669725#M200937</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2020-07-15T22:30:52Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669728#M200939</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/278672"&gt;@ketpt42&lt;/a&gt;&amp;nbsp;Thanks for your response. When i run your solution I am getting the below error message. Could you please help?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;WARNING: Multiple lengths were specified for the variable ID by input data set(s). This can cause truncation of data.&lt;BR /&gt;NOTE: There were 6 observations read from the data set WORK.GOOD_NUMBERS.&lt;BR /&gt;&lt;FONT color="#FF0000"&gt;ERROR: Argument length greater than length of key variable ID at line 1747 column 36.&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT color="#FF0000"&gt;ERROR: Keys and data must be specified in the same order and have the same types as given in DefineKey/DefineData at line 1747 column 36.&lt;/FONT&gt;&lt;BR /&gt;ERROR: DATA STEP Component Object failure. Aborted during the EXECUTION phase.&lt;BR /&gt;NOTE: The SAS System stopped processing this step because of errors.&lt;BR /&gt;NOTE: There were 1 observations read from the data set WORK.MARKET_DB.&lt;BR /&gt;WARNING: The data set WORK.WANT may be incomplete. When this step was stopped there were 0 observations and 7 variables.&lt;BR /&gt;WARNING: Data set WORK.WANT was not replaced because this step was stopped.&lt;/P&gt;</description>
      <pubDate>Wed, 15 Jul 2020 21:42:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669728#M200939</guid>
      <dc:creator>vicky07</dc:creator>
      <dc:date>2020-07-15T21:42:51Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669735#M200942</link>
      <description>Follow &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;'s advice #1 and my #2.</description>
      <pubDate>Wed, 15 Jul 2020 21:59:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669735#M200942</guid>
      <dc:creator>ketpt42</dc:creator>
      <dc:date>2020-07-15T21:59:41Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669743#M200945</link>
      <description>&lt;P&gt;I fixed the errors however the output is bringing in the phone numbers that doesn't belong to the ID in the look up dataset(confirm_phn). The below red highlighted phone numbers shouldn't be part of the output,&amp;nbsp; it's doing the&amp;nbsp; exclusion at ID level if there is no match but not removing the phone numbers for the matching ID if it doesn't belong in the "confirm_phn" table.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;TABLE width="476"&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD width="47px"&gt;ID&lt;/TD&gt;
&lt;TD width="83px"&gt;ID2&lt;/TD&gt;
&lt;TD width="100px"&gt;py1&lt;/TD&gt;
&lt;TD width="100px"&gt;py2&lt;/TD&gt;
&lt;TD width="100px"&gt;py3&lt;/TD&gt;
&lt;TD width="100px"&gt;py4&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD width="47px"&gt;1325&lt;/TD&gt;
&lt;TD width="83px"&gt;56782322&lt;/TD&gt;
&lt;TD width="100px"&gt;5872584458&lt;/TD&gt;
&lt;TD width="100px"&gt;&lt;FONT color="#FF0000"&gt;2478569877&lt;/FONT&gt;&lt;/TD&gt;
&lt;TD width="100px"&gt;&lt;FONT color="#FF0000"&gt;8987786548&lt;/FONT&gt;&lt;/TD&gt;
&lt;TD width="100px"&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD width="47px"&gt;5489&lt;/TD&gt;
&lt;TD width="83px"&gt;58654851&lt;/TD&gt;
&lt;TD width="100px"&gt;&lt;FONT color="#FF0000"&gt;8658656767&lt;/FONT&gt;&lt;/TD&gt;
&lt;TD width="100px"&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD width="100px"&gt;&amp;nbsp;&lt;/TD&gt;
&lt;TD width="100px"&gt;2795003597&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD width="47px"&gt;8799&lt;/TD&gt;
&lt;TD width="83px"&gt;89745434&lt;/TD&gt;
&lt;TD width="100px"&gt;4897568698&lt;/TD&gt;
&lt;TD width="100px"&gt;3697489567&lt;/TD&gt;
&lt;TD width="100px"&gt;&lt;FONT color="#FF0000"&gt;3265987465&lt;/FONT&gt;&lt;/TD&gt;
&lt;TD width="100px"&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;data want;&lt;BR /&gt;if 0 then set good_numbers;&lt;/P&gt;
&lt;P&gt;if _N_=1 then do;&lt;BR /&gt;dcl hash H(dataset:'good_numbers');&lt;BR /&gt;H.definekey('ID','confirm_phn');&lt;BR /&gt;H.definedata('confirm_phn');&lt;BR /&gt;H.definedone();&lt;/P&gt;
&lt;P&gt;end;&lt;BR /&gt;set market_db;&lt;BR /&gt;array phn py1-py4;&lt;BR /&gt;do over phn;&lt;BR /&gt;if H.find(key:ID, key:phn) = 0 or h.find(key:id2, key:phn) = 0 then do;&lt;BR /&gt;output;&lt;BR /&gt;leave; &lt;BR /&gt;end;&lt;BR /&gt;end;&lt;BR /&gt;call missing(confirm_phn);&lt;BR /&gt;drop confirm_phn;&lt;BR /&gt;run;&lt;/P&gt;</description>
      <pubDate>Wed, 15 Jul 2020 22:46:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669743#M200945</guid>
      <dc:creator>vicky07</dc:creator>
      <dc:date>2020-07-15T22:46:39Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669757#M200949</link>
      <description>So if your number is not found, set it to missing. You'll need to take out the leave statement and change the output so that it only happens on a line where at least one match was made.</description>
      <pubDate>Thu, 16 Jul 2020 00:25:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669757#M200949</guid>
      <dc:creator>ketpt42</dc:creator>
      <dc:date>2020-07-16T00:25:46Z</dc:date>
    </item>
    <item>
      <title>Re: Inner join using hash</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669835#M200993</link>
      <description>&lt;P&gt;Make your life easier by using an intelligent data structure:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data market_db;
input @1 ID :$4. @6 ID2 :$8. @15 py1 $10. @26 py2 $10. @37 py3 $10. @48 py4 $10.;
infile cards missover ;
cards;
1325 56782322 5872584458 2478569877 8987786548
5489 58654851 8658656767                       2795003597  
3287 29365474 9831765765 5458456844           
8799 89745434 4897568698 3697489567 3265987465
;

data good_numbers;
input ID :$8. confirm_phn :$10.;
infile cards;
cards;
56782322 5872584458
5489     2795003597
89745434 4897568698
2587     5872584458
9876     9831765765
8799     3697489567
;

proc transpose
  data=market_db
  out=long (
    rename=(col1=phn)
    where=(phn ne "")
  )
;
by id notsorted id2;
var py:;
run;

data market_db_intelligent;
set long;
py_type = substr(_name_,3);
drop _name_;
run;

data want;
set market_db_intelligent;
if _n_ = 1
then do;
  length cid $8;
  declare hash good (dataset:"good_numbers (rename=(id=cid confirm_phn=phn))");
  good.definekey('cid','phn');
  good.definedone();
end;
if good.check(key:id2,key:phn) = 0 or good.check(key:id,key:phn) = 0;
drop cid;
run;
  &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;You can see how transposing to a long dataset layout makes coding simple.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Jul 2020 08:42:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Inner-join-using-hash/m-p/669835#M200993</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2020-07-16T08:42:09Z</dc:date>
    </item>
  </channel>
</rss>

