<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Hash join to replace SQL Left Join in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103632#M291304</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I agree to the SPDS point, however I need to know how much of a difference it could make if I could use&amp;nbsp; a data step hash to the above step . I`m certain data vloume cannot be controlled much, but at least run times can be improved.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Thu, 13 Jun 2013 05:51:58 GMT</pubDate>
    <dc:creator>tommy81</dc:creator>
    <dc:date>2013-06-13T05:51:58Z</dc:date>
    <item>
      <title>Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103627#M291299</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I`m looking for an equivalent of the below code in Data step HASH join.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;proc sql;&lt;/P&gt;&lt;P&gt;create table rllp2 as &lt;/P&gt;&lt;P&gt;select distinct a.*, b.enam as name,b.position,b.orid,b.ccid_1 as ccid,b.region1 as region&lt;/P&gt;&lt;P&gt;max(a.BEGD,b.BEGD) as BEGD,min(a.ENDD,b.ENDD) as ENDD&lt;/P&gt;&lt;P&gt;from rllp3 as a&lt;/P&gt;&lt;P&gt;left join &lt;/P&gt;&lt;P&gt;rllp1 as b&lt;/P&gt;&lt;P&gt;on a.position_id_k = b.position_id_1 &lt;/P&gt;&lt;P&gt;and ((a.BEGD BETWEEN b.BEGD and&amp;nbsp; b.ENDD)&lt;/P&gt;&lt;P&gt;or (a.ENDD BETWEEN b.BEGD and&amp;nbsp; b.ENDD) or (a.BEGD &amp;gt;= b.BEGD &lt;BR /&gt;and a.ENDD &amp;lt;= b.ENDD) or (a.BEGD &amp;lt;= b.BEGD and a.ENDD &amp;gt;= b.ENDD))&lt;/P&gt;&lt;P&gt;order by position,BEGD,ENDD;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Not sure how to add the distinct ,extra join conditions and order by in a single datas tep.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;DATA MI ;&lt;/P&gt;&lt;P&gt;IF 0 THEN SET rllp1; &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; DECLARE hash VS(hashexp:10, dataset:"rllp1");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; VS.definekey('position_id_k,'position_id_1');&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; VS.definedata(all:'Y');&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; VS.definedone();&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; DO UNTIL(eof);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; SET rllp3end=eof;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; IF VS.find()=0 THEN OUTPUT;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ELSE DO;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; CALL missing( a1---b1);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; OUTPUT;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; END;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; END;&lt;/P&gt;&lt;P&gt;STOP ;&lt;/P&gt;&lt;P&gt;RUN;&lt;/P&gt;&lt;P&gt;Kindly suggest some methods &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Jun 2013 05:34:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103627#M291299</guid>
      <dc:creator>tommy81</dc:creator>
      <dc:date>2013-06-12T05:34:47Z</dc:date>
    </item>
    <item>
      <title>Re: Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103628#M291300</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;i don't think I can help about hashing, but maybe you could tell us about why? Is it a performance issue regarding the SQL?&lt;/P&gt;&lt;P&gt;It's quite hard for SAS SQL to optimize OR conditions. Storing in SPDE might improve performance (or SPD Server if available), and indexing on at least position_id_1 and&amp;nbsp; position_id_k.&lt;/P&gt;&lt;P&gt;Then I'm not sure how distinct works with max and min functions. I am more comfortable using Group By, but distinct may work fine for you...&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Jun 2013 11:50:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103628#M291300</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2013-06-12T11:50:35Z</dc:date>
    </item>
    <item>
      <title>Re: Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103629#M291301</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Can you rewrite your clause to say&lt;/P&gt;&lt;P&gt;left join &lt;/P&gt;&lt;P&gt;rllp1 as b&lt;/P&gt;&lt;P&gt;on a.position_id_k = b.position_id_1 &lt;/P&gt;&lt;P&gt;and a.BEGD &amp;lt;= b.ENDD&lt;/P&gt;&lt;P&gt;and a.ENDD &amp;gt;= b.BEGD&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I didn't work through all of the permutations but that is what I see normally when trying to make sure you count everything that may have happened during a date span.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Jun 2013 12:09:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103629#M291301</guid>
      <dc:creator>DBailey</dc:creator>
      <dc:date>2013-06-12T12:09:27Z</dc:date>
    </item>
    <item>
      <title>Re: Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103630#M291302</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;&lt;BR /&gt;Hi LinusH,&lt;/P&gt;&lt;P&gt;Its about performance and time. The datasets are 100Gb in size.That why i though its better to deploy data step hash.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Jun 2013 12:38:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103630#M291302</guid>
      <dc:creator>tommy81</dc:creator>
      <dc:date>2013-06-12T12:38:37Z</dc:date>
    </item>
    <item>
      <title>Re: Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103631#M291303</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;tommy81,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;When your data set gets into 30GB+ , that's when you move it into SPDE/SPDS!!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;After all, that's why SPDE was developed in the first place. it was the "free" solution to handle large data sets using your existing BASE SAS. I know BASE SAS is not Free, but you don't have to purchase and configure SPDS to handle such volumes.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I would strongly recommend investigating SPDE as storage engine for your data set. It will payoff down the line.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ahmed&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 12 Jun 2013 13:12:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103631#M291303</guid>
      <dc:creator>AhmedAl_Attar</dc:creator>
      <dc:date>2013-06-12T13:12:26Z</dc:date>
    </item>
    <item>
      <title>Re: Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103632#M291304</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I agree to the SPDS point, however I need to know how much of a difference it could make if I could use&amp;nbsp; a data step hash to the above step . I`m certain data vloume cannot be controlled much, but at least run times can be improved.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 13 Jun 2013 05:51:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103632#M291304</guid>
      <dc:creator>tommy81</dc:creator>
      <dc:date>2013-06-13T05:51:58Z</dc:date>
    </item>
    <item>
      <title>Re: Hash join to replace SQL Left Join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103633#M291305</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I think it will very complicated to do both the join logic and distinct/group by functionality within one single data step.&lt;/P&gt;&lt;P&gt;So my suggestion is to first try to optimize your SQL query first to see what gains is possible, t sum up:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;simplify the SQL suggested by DBailey&lt;/LI&gt;&lt;LI&gt;Index on the join column(s)&lt;/LI&gt;&lt;LI&gt;move the data to SPDE&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Since SQL operations such as summarizing and sorting is done in paralel, investigate if you can exploit the hardware (or have any other HW available). This includes setting up MEMSIZE/SORTSIZE options.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 13 Jun 2013 09:02:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Hash-join-to-replace-SQL-Left-Join/m-p/103633#M291305</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2013-06-13T09:02:33Z</dc:date>
    </item>
  </channel>
</rss>

