<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Merge vs sql full join in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891063#M352073</link>
    <description>&lt;P&gt;ok, I see the problem.&amp;nbsp; &amp;nbsp;It looks like I'm getting duplicate bene_sk values whenever there is a match on the first and third data sets.&amp;nbsp; &amp;nbsp;I've printed the first several rows of the duplicate bene_sk cases below.&amp;nbsp; I can't print that variable in this forum, but each two rows has same value and they alternate between having a value in lomajor or upmajor.&amp;nbsp; &amp;nbsp;Is there a place I can insert a statement of "on a.bene_sk=c.bene_sk"?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;84 proc sql;&lt;BR /&gt;2 The SAS System 09:42 Friday, August 25, 2023&lt;/P&gt;&lt;P&gt;85 create table d as&lt;BR /&gt;86 select *&lt;BR /&gt;87 from tbl2&lt;BR /&gt;88 group by bene_sk&lt;BR /&gt;89 having count(*) gt 1;&lt;BR /&gt;NOTE: The query requires remerging summary statistics back with the original data.&lt;BR /&gt;NOTE: Table WORK.D created, with 58 rows and 4 columns.&lt;/P&gt;&lt;P&gt;90 quit;&lt;BR /&gt;NOTE: PROCEDURE SQL used (Total process time):&lt;BR /&gt;real time 0.03 seconds&lt;BR /&gt;cpu time 0.01 seconds&lt;/P&gt;&lt;P&gt;91&lt;BR /&gt;92 data _null_;&lt;BR /&gt;93 set d;&lt;BR /&gt;94 put lomajor lominor upmajor;&lt;BR /&gt;95 run;&lt;/P&gt;&lt;P&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;/P&gt;</description>
    <pubDate>Fri, 25 Aug 2023 16:34:14 GMT</pubDate>
    <dc:creator>Batman</dc:creator>
    <dc:date>2023-08-25T16:34:14Z</dc:date>
    <item>
      <title>Merge vs sql full join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891037#M352067</link>
      <description>&lt;P&gt;I'm trying to compare results from a data step merge and full join in sql.&amp;nbsp; &amp;nbsp;I get a higher number of output obs using sql and I'm not sure why.&amp;nbsp; &amp;nbsp; It isn't a problem of duplicates, there is just row for each value of "bene_sk" in each input file.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;83 /*Method 1: merge with data step*/&lt;BR /&gt;84 data tbl1;&lt;BR /&gt;85 merge lomajor16 lominor16 upmajor16;&lt;BR /&gt;86 by bene_sk;&lt;BR /&gt;87 run;&lt;/P&gt;&lt;P&gt;NOTE: There were 29625 observations read from the data set WORK.LOMAJOR16.&lt;BR /&gt;NOTE: There were 36241 observations read from the data set WORK.LOMINOR16.&lt;BR /&gt;NOTE: There were 396 observations read from the data set WORK.UPMAJOR16.&lt;BR /&gt;NOTE: The data set WORK.TBL1 has 60904 observations and 4 variables.&lt;BR /&gt;NOTE: DATA statement used (Total process time):&lt;BR /&gt;real time 1.39 seconds&lt;BR /&gt;cpu time 0.16 seconds&lt;/P&gt;&lt;P&gt;88&lt;BR /&gt;89 /*Method 2: merge with sql*/&lt;BR /&gt;90 proc sql;&lt;BR /&gt;91 create table tbl2 as&lt;BR /&gt;92 select coalesce(a.bene_sk, b.bene_sk, c.bene_sk) as bene_sk ,a.lomajor, b.lominor, c.upmajor&lt;BR /&gt;93 from lomajor16 as a&lt;BR /&gt;94 full join lominor16 as b&lt;BR /&gt;95 on a.bene_sk=b.bene_sk&lt;BR /&gt;96 full join upmajor16 as c&lt;BR /&gt;97 on b.bene_sk=c.bene_sk&lt;BR /&gt;98 order by bene_sk&lt;BR /&gt;99 ;&lt;BR /&gt;NOTE: SAS threaded sort was used.&lt;BR /&gt;NOTE: Table WORK.TBL2 created, with 60933 rows and 4 columns.&lt;/P&gt;&lt;P&gt;100 quit;&lt;/P&gt;</description>
      <pubDate>Fri, 25 Aug 2023 15:40:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891037#M352067</guid>
      <dc:creator>Batman</dc:creator>
      <dc:date>2023-08-25T15:40:09Z</dc:date>
    </item>
    <item>
      <title>Re: Merge vs sql full join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891041#M352070</link>
      <description>&lt;P&gt;MERGE and FULL JOIN will not produce the same output in all cases, and since we don't have your data, it's hard to be more specific.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 25 Aug 2023 15:45:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891041#M352070</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2023-08-25T15:45:51Z</dc:date>
    </item>
    <item>
      <title>Re: Merge vs sql full join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891045#M352071</link>
      <description>&lt;P&gt;Perhaps looking at a much smaller example will allow tracing what happens. The crux is that the joins are sort of sequential, one join then joined on another.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data one;
  input a x;
datalines;
1 1
2 2
3 3
;

data two;
  input a y;
datalines;
1 11
3 33
;

data three;
  input a z;
datalines;
2  222
3  333
;


data merged;
   merge one two three;
   by a;
run;

proc sql;
   create table joined as
 select coalesce(a.a, b.a, c.a) as a ,a.x, b.y, c.z
 from one as a
 full join two as b
 on a.a=b.a
 full join three as c
 on b.a=c.a
 order by a
 ;
quit;&lt;/PRE&gt;
&lt;P&gt;Assuming no duplicate values of the BY variable(s) the Merge will result in the number of observations equal to the number of by variable combinations because of the way the other variables on the merged sets behave with matching By values.&lt;/P&gt;
&lt;P&gt;Note that the other variables don't behave the same with JOIN, so you get uncombined versions of the observations.&lt;/P&gt;</description>
      <pubDate>Fri, 25 Aug 2023 15:56:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891045#M352071</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2023-08-25T15:56:06Z</dc:date>
    </item>
    <item>
      <title>Re: Merge vs sql full join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891063#M352073</link>
      <description>&lt;P&gt;ok, I see the problem.&amp;nbsp; &amp;nbsp;It looks like I'm getting duplicate bene_sk values whenever there is a match on the first and third data sets.&amp;nbsp; &amp;nbsp;I've printed the first several rows of the duplicate bene_sk cases below.&amp;nbsp; I can't print that variable in this forum, but each two rows has same value and they alternate between having a value in lomajor or upmajor.&amp;nbsp; &amp;nbsp;Is there a place I can insert a statement of "on a.bene_sk=c.bene_sk"?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;84 proc sql;&lt;BR /&gt;2 The SAS System 09:42 Friday, August 25, 2023&lt;/P&gt;&lt;P&gt;85 create table d as&lt;BR /&gt;86 select *&lt;BR /&gt;87 from tbl2&lt;BR /&gt;88 group by bene_sk&lt;BR /&gt;89 having count(*) gt 1;&lt;BR /&gt;NOTE: The query requires remerging summary statistics back with the original data.&lt;BR /&gt;NOTE: Table WORK.D created, with 58 rows and 4 columns.&lt;/P&gt;&lt;P&gt;90 quit;&lt;BR /&gt;NOTE: PROCEDURE SQL used (Total process time):&lt;BR /&gt;real time 0.03 seconds&lt;BR /&gt;cpu time 0.01 seconds&lt;/P&gt;&lt;P&gt;91&lt;BR /&gt;92 data _null_;&lt;BR /&gt;93 set d;&lt;BR /&gt;94 put lomajor lominor upmajor;&lt;BR /&gt;95 run;&lt;/P&gt;&lt;P&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;BR /&gt;. . 1&lt;BR /&gt;1 . .&lt;/P&gt;</description>
      <pubDate>Fri, 25 Aug 2023 16:34:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891063#M352073</guid>
      <dc:creator>Batman</dc:creator>
      <dc:date>2023-08-25T16:34:14Z</dc:date>
    </item>
    <item>
      <title>Re: Merge vs sql full join</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891073#M352074</link>
      <description>&lt;P&gt;Your SQL code looks wrong (it is certainly confusing).&amp;nbsp; What does it even mean to full join in a chain like that?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Let's make some example data.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data one;
  input id name1 $30.;
  s1=1;
cards;
1 all three
2 one only
3 one and two
4 one and two and three
;
data two;
  input id name2 $30.;
  s2=1;
cards;
1 all three
3 one and two
5 two only
;
data three;
  input id name3 $30.;
  s3=1;
cards;
1 all three
4 one and two and three
6 three only
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;So when I merge these&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data ds_merge;
  merge one two three;
  by id;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;there are 6 observations.&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Tom_0-1692982003147.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/87102i1D5343E4E49EDC67/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Tom_0-1692982003147.png" alt="Tom_0-1692982003147.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;If I tell SQL to combine two of them and combine the resulting set with the third&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;create table sql_join as 
select * 
  from three natural full join (select * from two natural full join one) x 
  order by 1,2
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I get the same 6 observations&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Tom_1-1692982107158.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/87103i76B702A3019109CF/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Tom_1-1692982107158.png" alt="Tom_1-1692982107158.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;But if I leave it up to the PROC to try to combine three datasets will chained full joins&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;create table sql_join2 as 
select * 
  from two natural full join three natural full join one 
  order by 1,2
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I get something different&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Tom_2-1692982195736.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/87105i5A42D2402E52E0A1/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Tom_2-1692982195736.png" alt="Tom_2-1692982195736.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;To get it to work properly without the nesting the subsequent joins have to refer back to all of the previous dataset's key values, using OR condition.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql feedback;
create table sql_join as
select coalesce(one.id,two.id,three.id) as id
  ,name1,s1,name2,s2,name3,s3
  from one 
   full join two 
     on one.id=two.id
   full join three
     on three.id = one.id or three.id=two.id
  order by 1
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 25 Aug 2023 17:48:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Merge-vs-sql-full-join/m-p/891073#M352074</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2023-08-25T17:48:58Z</dc:date>
    </item>
  </channel>
</rss>

