<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Why Proc SQL left join output duplicates values? in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416885#M102375</link>
    <description>&lt;P&gt;Because there are duplicates in your Q table.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data p;
	input 
	org submit product report uniquepone:$1. uniqueptwo:$1.;
datalines;
1 2 3 4 A B
2 2 2 2 C D
3 3 4 6 E F
;
run;

data q;
	input 
	org submit product report uniqueqone:$1. uniqueqtwo:$1.;
datalines;
1 2 3 4 G H
1 2 3 4 I J
2 2 2 2 K L
3 3 4 6 M N
3 3 4 6 O P
4 4 4 4 Q R
4 5 5 5 S T
;
run;


proc sql; 
	select p.org, p.submit, p.product, p.report, uniquepone, uniqueqtwo 
	from p left join q
	on p.org = q.org and p.submit = q.submit and p.product = q.product and p.report = q.report; 
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;You don't get three records.&amp;nbsp; You get five.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;                                       The SAS System     14:45 Tuesday, November 28, 2017   4

                      org    submit   product    report  uniquepone  uniqueqtwo
 
                        1         2         3         4  A           H
                        1         2         3         4  A           J
                        2         2         2         2  C           L
                        3         3         4         6  E           P
                        3         3         4         6  E           N


&lt;/PRE&gt;</description>
    <pubDate>Tue, 28 Nov 2017 21:05:44 GMT</pubDate>
    <dc:creator>HB</dc:creator>
    <dc:date>2017-11-28T21:05:44Z</dc:date>
    <item>
      <title>Why Proc SQL left join output duplicates values?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416864#M102369</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hi everyone, I found it's wired that my SQL left join output duplicates values, seems the Cartesian product was not cleaned/selected by SAS. My left join didn't result in the same number of rows as in my left table. I run a sort with nodupkep option and cleaned up the duplicates by hand, but I am very curious what happened.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here are the proc contents for my two table:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="1.png" style="width: 595px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/16889i9AC7C2C9CA370C8B/image-size/large?v=v2&amp;amp;px=999" role="button" title="1.png" alt="1.png" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="2.png" style="width: 448px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/16892iDDC9DE8FDBE07F2F/image-size/large?v=v2&amp;amp;px=999" role="button" title="2.png" alt="2.png" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="3.png" style="width: 576px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/16890iFB734CDA2AC66135/image-size/large?v=v2&amp;amp;px=999" role="button" title="3.png" alt="3.png" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="4.png" style="width: 487px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/16891i1EF9301673E26CDE/image-size/large?v=v2&amp;amp;px=999" role="button" title="4.png" alt="4.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here are my SQL Left Join code:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table MergedHDF as
select coalesce(p.ProductYear,q.ProductYear)as ProductYear,
       p.MeasurementYear,
       coalesce(p.SubmissionId, q.SubmissionId)as SubmissionId,
       coalesce(p.OrganizationId,q.OrganizationId)as OrganizationId,
       coalesce(p.OrganizationName,q.OrganizationName) as OrganizationName,
       coalesce(p.ProductLine, q.ProductLine) as ProductLine,
       coalesce(p.ReportingProduct, q.ReportingProduct) as ReportingProduct,
       coalesce(p.MeasureCode,q.MeasureCode) as MeasureCode,
       p.MeasureName, p.IndicatorName, p.IndicatorCode,
	   p.Rate, p.Numerator, p.Denominator, p.EligiblePopulation, 
       p.Type, p.Region, p.sum_num, p.sum_denom,p.AggregateRate,
       coalesce(p.Status,q.Status) as Status,q.states 
from averagedrate as p
left join
combinedstates as q
on  p.organizationid=q.organizationid and p.SubmissionId=q.submissionId and 
    p.ProductLine=q.ProductLine and p.ReportingProduct=q.ReportingProduct;
quit;

proc sort data=mergedHDF nodupkey;
by measurecode productyear organizationid submissionid productline reportingproduct status;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here are my Log:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="5.png" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/16894i61B756AD4C865A3F/image-size/large?v=v2&amp;amp;px=999" role="button" title="5.png" alt="5.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Note that there are 5426 rows, rather than 2816 rows as in my left table.&lt;/P&gt;&lt;P&gt;Here are the log after I remove duplicates:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="6.png" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/16895iE01921BDFA687691/image-size/large?v=v2&amp;amp;px=999" role="button" title="6.png" alt="6.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This really confuses me, any idea on why this happened will be greatly appreciated!&lt;/P&gt;</description>
      <pubDate>Tue, 28 Nov 2017 20:37:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416864#M102369</guid>
      <dc:creator>LisaYIN9309</dc:creator>
      <dc:date>2017-11-28T20:37:02Z</dc:date>
    </item>
    <item>
      <title>Re: Why Proc SQL left join output duplicates values?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416885#M102375</link>
      <description>&lt;P&gt;Because there are duplicates in your Q table.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data p;
	input 
	org submit product report uniquepone:$1. uniqueptwo:$1.;
datalines;
1 2 3 4 A B
2 2 2 2 C D
3 3 4 6 E F
;
run;

data q;
	input 
	org submit product report uniqueqone:$1. uniqueqtwo:$1.;
datalines;
1 2 3 4 G H
1 2 3 4 I J
2 2 2 2 K L
3 3 4 6 M N
3 3 4 6 O P
4 4 4 4 Q R
4 5 5 5 S T
;
run;


proc sql; 
	select p.org, p.submit, p.product, p.report, uniquepone, uniqueqtwo 
	from p left join q
	on p.org = q.org and p.submit = q.submit and p.product = q.product and p.report = q.report; 
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;You don't get three records.&amp;nbsp; You get five.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;                                       The SAS System     14:45 Tuesday, November 28, 2017   4

                      org    submit   product    report  uniquepone  uniqueqtwo
 
                        1         2         3         4  A           H
                        1         2         3         4  A           J
                        2         2         2         2  C           L
                        3         3         4         6  E           P
                        3         3         4         6  E           N


&lt;/PRE&gt;</description>
      <pubDate>Tue, 28 Nov 2017 21:05:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416885#M102375</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2017-11-28T21:05:44Z</dc:date>
    </item>
    <item>
      <title>Re: Why Proc SQL left join output duplicates values?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416895#M102381</link>
      <description>&lt;P&gt;Aha! I see, thank you &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/6401"&gt;@HB&lt;/a&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Nov 2017 21:22:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Why-Proc-SQL-left-join-output-duplicates-values/m-p/416895#M102381</guid>
      <dc:creator>LisaYIN9309</dc:creator>
      <dc:date>2017-11-28T21:22:21Z</dc:date>
    </item>
  </channel>
</rss>

