<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Multiple SQL Joins using Hash? in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463866#M118237</link>
    <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Proc sql;

create table JOIN as
select 
	a.key1, a.x1, a.x2, a.x3, 
	b.y1, b.y2, b.y3, b.y4,
	c.z1, c.z2, c.z3
	d.p1, d.p2
	e.t1, e.t2, e.t3, e.t4, 
	f.n1, f.n2, 
	g.w1, g.w2, g.w3

from DATASET1 a

	join DATASET2 b
	on b.y1  between '01aug2017'd and '31aug2017'd
	and a.key1 = b.key1
	and b.y3 = 'D'
	and b.y4 = 'P'

	join DATASET3 c
	on b.y3 = c.z1

	join DATASET4 d
	on c.key1 = d.key1
	and d.p2 = '31jul2017'd

	join DATASET5 e
	on d.key2 = e.key1

	join DATASET6 g
	on g.w1 in ('123')
	and e.t2 between '01aug2017'd and '31aug2017'd

	join DATASET7 f
	on c.key2 = f.key1

where a.x1 between '01aug2017'd and '31aug2017'd
and a.x2 = 'C'
and a.x3 in ('123');

Quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Hey everyone, I'm fairly new to SAS. I'm trying to join multiple datasets together on several different keys, and meeting certain criteria. I've posted my code and&amp;nbsp;it does exactly what I want it to do... in about 80 seconds. Some of the tables (namely table e) are HUGE. Here's my issue: I need to cut down on the process time, because I need to loop this process several thousand times. I have the macro to do it, but it would take too long. I've read online that hash tables might be the solution but I can't wrap my head around how to code it. Any help would be appreciated! Thanks. (Using SAS EG 7.1)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 21 May 2018 19:54:58 GMT</pubDate>
    <dc:creator>Jrunner1569</dc:creator>
    <dc:date>2018-05-21T19:54:58Z</dc:date>
    <item>
      <title>Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463866#M118237</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Proc sql;

create table JOIN as
select 
	a.key1, a.x1, a.x2, a.x3, 
	b.y1, b.y2, b.y3, b.y4,
	c.z1, c.z2, c.z3
	d.p1, d.p2
	e.t1, e.t2, e.t3, e.t4, 
	f.n1, f.n2, 
	g.w1, g.w2, g.w3

from DATASET1 a

	join DATASET2 b
	on b.y1  between '01aug2017'd and '31aug2017'd
	and a.key1 = b.key1
	and b.y3 = 'D'
	and b.y4 = 'P'

	join DATASET3 c
	on b.y3 = c.z1

	join DATASET4 d
	on c.key1 = d.key1
	and d.p2 = '31jul2017'd

	join DATASET5 e
	on d.key2 = e.key1

	join DATASET6 g
	on g.w1 in ('123')
	and e.t2 between '01aug2017'd and '31aug2017'd

	join DATASET7 f
	on c.key2 = f.key1

where a.x1 between '01aug2017'd and '31aug2017'd
and a.x2 = 'C'
and a.x3 in ('123');

Quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Hey everyone, I'm fairly new to SAS. I'm trying to join multiple datasets together on several different keys, and meeting certain criteria. I've posted my code and&amp;nbsp;it does exactly what I want it to do... in about 80 seconds. Some of the tables (namely table e) are HUGE. Here's my issue: I need to cut down on the process time, because I need to loop this process several thousand times. I have the macro to do it, but it would take too long. I've read online that hash tables might be the solution but I can't wrap my head around how to code it. Any help would be appreciated! Thanks. (Using SAS EG 7.1)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 21 May 2018 19:54:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463866#M118237</guid>
      <dc:creator>Jrunner1569</dc:creator>
      <dc:date>2018-05-21T19:54:58Z</dc:date>
    </item>
    <item>
      <title>Re: Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463925#M118254</link>
      <description>&lt;P&gt;Your problem is not the time of the join, but that you loop it 1000 times. There's a fair chance that what you do can be solved by by-group processing.&lt;/P&gt;</description>
      <pubDate>Mon, 21 May 2018 22:42:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463925#M118254</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2018-05-21T22:42:12Z</dc:date>
    </item>
    <item>
      <title>Re: Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463930#M118257</link>
      <description>&lt;P&gt;I agree with&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/11562"&gt;@Kurt_Bremser&lt;/a&gt;.&lt;/P&gt;
&lt;P&gt;* What varies between the 1000 runs? *&lt;/P&gt;
&lt;P&gt;If you read the same tables several times, sort them (and/or index them, depending on what is needed) so they are ready to use by the merge process.&lt;/P&gt;</description>
      <pubDate>Tue, 22 May 2018 21:28:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463930#M118257</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2018-05-22T21:28:46Z</dc:date>
    </item>
    <item>
      <title>Re: Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463952#M118267</link>
      <description>&lt;P&gt;Your query can be rewritten as:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Proc sql;

create table JOIN as
select 
	a.key1, a.x1, a.x2, a.x3, 
	b.y1, b.y2, b.y3, b.y4,
	c.z1, c.z2, c.z3
	d.p1, d.p2
	e.t1, e.t2, e.t3, e.t4, 
	f.n1, f.n2, 
	g.w1, g.w2, g.w3

from 
    DATASET1 a                                join
    DATASET2 b on a.key1 = b.key1 join
    DATASET3 c                                join
    DATASET4 d on c.key1 = d.key1 join
    DATASET5 e on d.key2 = e.key1 join
    DATASET7 f on c.key2 = f.key1   join
    DATASET6 g
    
where 
    a.x2 = 'C'                                 and
    b.y3 = 'D'                                 and
    b.y4 = 'P'                                 and
    c.z1 = 'D'                                 and
    a.x3 in ('123')                            and
    g.w1 in ('123')                            and
    d.p2 = '31jul2017'd                        and
    a.x1 between '01aug2017'd and '31aug2017'd and
    b.y1 between '01aug2017'd and '31aug2017'd and  
    e.t2 between '01aug2017'd and '31aug2017'd
    ;    
Quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;which means that you are actually asking for&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;(a*b) x (c*d*e*f) x g&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;where * is a join and x is a cartesian product. Are you certain that this is what you want?&lt;/P&gt;</description>
      <pubDate>Tue, 22 May 2018 02:54:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/463952#M118267</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2018-05-22T02:54:11Z</dc:date>
    </item>
    <item>
      <title>Re: Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/464107#M118327</link>
      <description>So I took your code suggestion (added 'DATASET6 g on a.x3 = g.w1') and it runs about 30 seconds faster (great!) and produces the same result after doing a quick proc-compare. Are there further enhancements now that you know what I'm after?</description>
      <pubDate>Tue, 22 May 2018 15:37:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/464107#M118327</guid>
      <dc:creator>Jrunner1569</dc:creator>
      <dc:date>2018-05-22T15:37:10Z</dc:date>
    </item>
    <item>
      <title>Re: Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/464110#M118329</link>
      <description>So essentially these tables are several billions of observations. I start with data from table A, but need the data from table E to be joined to it. E's data can only be gotten from a key field in table D. D's key field can only be gotten by matching on a key field in C....so on until I reach table A.</description>
      <pubDate>Tue, 22 May 2018 15:40:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/464110#M118329</guid>
      <dc:creator>Jrunner1569</dc:creator>
      <dc:date>2018-05-22T15:40:52Z</dc:date>
    </item>
    <item>
      <title>Re: Multiple SQL Joins using Hash?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/464139#M118335</link>
      <description>&lt;P&gt;Without knowing the details of your data structures, at this point, your best bet would b to include indexes on join keys and on where condition variables, as &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp;suggested.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;More importantly, you should reconsider your need to execute this query thousands of times. There is most likely a more efficient way.&lt;/P&gt;</description>
      <pubDate>Tue, 22 May 2018 18:08:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Multiple-SQL-Joins-using-Hash/m-p/464139#M118335</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2018-05-22T18:08:44Z</dc:date>
    </item>
  </channel>
</rss>

