<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Join and remove duplicates in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648431#M194230</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/267482"&gt;@ifti_ch2002&lt;/a&gt;&amp;nbsp; The effort -"&lt;EM&gt;Thanks, I spent hours on Join"&amp;nbsp;&lt;/EM&gt;is not a bad idea nor a&amp;nbsp;wrong approach. The intricate details to pay attention to in my humble opinion is the risk of a cartesian join triggered by an inequality operator. However, in your case that concern doesn't surface for the reason, the combination of an equality and inequality operator makes it rather straight forward look-up as the SQL optimizer determines the best join algorithm principally on the equality operator.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp; aka &lt;EM&gt;Mr Perfect&lt;/EM&gt; eloquently&amp;nbsp;writes the distinction between JOINS,Subqueries going above and beyond from a performance standpoint and similar practical usage have often been noticed in posts by&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/462"&gt;@PGStats&lt;/a&gt;&amp;nbsp;aka &lt;EM&gt;Pierre with no peers&lt;/EM&gt;&amp;nbsp;who is above all. Please do peek into their posts&amp;nbsp;when you have the time in&amp;nbsp;&lt;STRONG&gt;grasping&lt;/STRONG&gt; the concepts.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind Regards!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;

data have1;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
   format date mmddyy10.;
   datalines;  
1 4-7-2003 
1 4-7-2009 
1 4-7-2010 
1 4-7-2011 
1 4-7-2012 
1 4-7-2013 
1 4-7-2014 
1 4-7-2015 
1 4-7-2016 
1 4-7-2018 
2 4-7-2005 
2 4-7-2011 
2 4-7-2012 
2 4-7-2013 
2 4-7-2014 
2 4-7-2015 
2 4-7-2016 
2 4-7-2017 
2 4-7-2018 
2 4-7-2019 
2 4-7-2020 
;       
run; 

 

data have2;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
     format date mmddyy10.;
   datalines;  
1 4-7-2003 
2 4-7-2005 
;       
run; 

proc sql;
create table  want as
select a.*
from have1 a inner join have2 b
on a.IDnumber=b.IDnumber
and a.date ne b.date
order by a.IDnumber, a.date;
quit;

proc print noobs;run;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 17 May 2020 20:02:10 GMT</pubDate>
    <dc:creator>novinosrin</dc:creator>
    <dc:date>2020-05-17T20:02:10Z</dc:date>
    <item>
      <title>Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648403#M194214</link>
      <description>&lt;P&gt;I have following data&lt;/P&gt;&lt;PRE&gt;data have1;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
   datalines;  
1 4-7-2003 
1 4-7-2009 
1 4-7-2010 
1 4-7-2011 
1 4-7-2012 
1 4-7-2013 
1 4-7-2014 
1 4-7-2015 
1 4-7-2016 
1 4-7-2018 
2 4-7-2005 
2 4-7-2011 
2 4-7-2012 
2 4-7-2013 
2 4-7-2014 
2 4-7-2015 
2 4-7-2016 
2 4-7-2017 
2 4-7-2018 
2 4-7-2019 
2 4-7-2020 
;       
run; &lt;BR /&gt;&lt;BR /&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;And&lt;/P&gt;&lt;PRE&gt;data have2;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
   datalines;  
1 4-7-2003 
2 4-7-2005 
;       
run; &lt;/PRE&gt;&lt;P&gt;I need the ouput as following&lt;/P&gt;&lt;PRE&gt;data want;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
   datalines;  
1 4-7-2009 
1 4-7-2010 
1 4-7-2011 
1 4-7-2012 
1 4-7-2013 
1 4-7-2014 
1 4-7-2015 
1 4-7-2016 
1 4-7-2018 
2 4-7-2011 
2 4-7-2012 
2 4-7-2013 
2 4-7-2014 
2 4-7-2015 
2 4-7-2016 
2 4-7-2017 
2 4-7-2018 
2 4-7-2019 
2 4-7-2020 
;       
run; &lt;/PRE&gt;&lt;P&gt;I need to check by id number, if ID matches in both tables, and if the data in have1 has not exact match in have 2.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 17 May 2020 15:07:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648403#M194214</guid>
      <dc:creator>ifti_ch2002</dc:creator>
      <dc:date>2020-05-17T15:07:42Z</dc:date>
    </item>
    <item>
      <title>Re: Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648404#M194215</link>
      <description>&lt;P&gt;Hi &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/267482"&gt;@ifti_ch2002&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Please try this to remove observations of have2 from have1:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
	merge have1 (in=x) have2 (in=y);
	by IDnumber date;
	if x and not y;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Best,&lt;/P&gt;</description>
      <pubDate>Sun, 17 May 2020 15:25:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648404#M194215</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-17T15:25:51Z</dc:date>
    </item>
    <item>
      <title>Re: Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648405#M194216</link>
      <description>&lt;P&gt;Another option could be the use of a SET operator in PROC SQL:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
	create table want as
	select IDnumber, date from have1
	except
	select IDnumber, date from have2;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;or a hash:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
	if _n_=1 then do;
		declare hash h (dataset:'have2');
		h.definekey('IDnumber', 'date');
		h.definedata('IDnumber', 'date');
		h.definedone();
	end;
	set have1;
	if h.find() ne 0 then output;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Best,&lt;/P&gt;</description>
      <pubDate>Sun, 17 May 2020 15:17:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648405#M194216</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-17T15:17:35Z</dc:date>
    </item>
    <item>
      <title>Re: Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648407#M194217</link>
      <description>&lt;P&gt;Thanks, I spent hours on Join.&lt;/P&gt;</description>
      <pubDate>Sun, 17 May 2020 15:35:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648407#M194217</guid>
      <dc:creator>ifti_ch2002</dc:creator>
      <dc:date>2020-05-17T15:35:49Z</dc:date>
    </item>
    <item>
      <title>Re: Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648408#M194218</link>
      <description>You're welcome &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/267482"&gt;@ifti_ch2002&lt;/a&gt;</description>
      <pubDate>Sun, 17 May 2020 15:36:56 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648408#M194218</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-17T15:36:56Z</dc:date>
    </item>
    <item>
      <title>Re: Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648431#M194230</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/267482"&gt;@ifti_ch2002&lt;/a&gt;&amp;nbsp; The effort -"&lt;EM&gt;Thanks, I spent hours on Join"&amp;nbsp;&lt;/EM&gt;is not a bad idea nor a&amp;nbsp;wrong approach. The intricate details to pay attention to in my humble opinion is the risk of a cartesian join triggered by an inequality operator. However, in your case that concern doesn't surface for the reason, the combination of an equality and inequality operator makes it rather straight forward look-up as the SQL optimizer determines the best join algorithm principally on the equality operator.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp; aka &lt;EM&gt;Mr Perfect&lt;/EM&gt; eloquently&amp;nbsp;writes the distinction between JOINS,Subqueries going above and beyond from a performance standpoint and similar practical usage have often been noticed in posts by&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/462"&gt;@PGStats&lt;/a&gt;&amp;nbsp;aka &lt;EM&gt;Pierre with no peers&lt;/EM&gt;&amp;nbsp;who is above all. Please do peek into their posts&amp;nbsp;when you have the time in&amp;nbsp;&lt;STRONG&gt;grasping&lt;/STRONG&gt; the concepts.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind Regards!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;

data have1;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
   format date mmddyy10.;
   datalines;  
1 4-7-2003 
1 4-7-2009 
1 4-7-2010 
1 4-7-2011 
1 4-7-2012 
1 4-7-2013 
1 4-7-2014 
1 4-7-2015 
1 4-7-2016 
1 4-7-2018 
2 4-7-2005 
2 4-7-2011 
2 4-7-2012 
2 4-7-2013 
2 4-7-2014 
2 4-7-2015 
2 4-7-2016 
2 4-7-2017 
2 4-7-2018 
2 4-7-2019 
2 4-7-2020 
;       
run; 

 

data have2;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
     format date mmddyy10.;
   datalines;  
1 4-7-2003 
2 4-7-2005 
;       
run; 

proc sql;
create table  want as
select a.*
from have1 a inner join have2 b
on a.IDnumber=b.IDnumber
and a.date ne b.date
order by a.IDnumber, a.date;
quit;

proc print noobs;run;

&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 17 May 2020 20:02:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648431#M194230</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2020-05-17T20:02:10Z</dc:date>
    </item>
    <item>
      <title>Re: Join and remove duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648524#M194293</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
data have1;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
   format date mmddyy10.;
   datalines;  
1 4-7-2003 
1 4-7-2009 
1 4-7-2010 
1 4-7-2011 
1 4-7-2012 
1 4-7-2013 
1 4-7-2014 
1 4-7-2015 
1 4-7-2016 
1 4-7-2018 
2 4-7-2005 
2 4-7-2011 
2 4-7-2012 
2 4-7-2013 
2 4-7-2014 
2 4-7-2015 
2 4-7-2016 
2 4-7-2017 
2 4-7-2018 
2 4-7-2019 
2 4-7-2020 
;       
run; 

 

data have2;
infile datalines missover; 
   input IDnumber @3 date  mmddyy10.;
     format date mmddyy10.;
   datalines;  
1 4-7-2003 
2 4-7-2005 
;       
run; 

proc sql;
create table  want as
select * from have1 
except
select * from have2;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 18 May 2020 11:54:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Join-and-remove-duplicates/m-p/648524#M194293</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2020-05-18T11:54:12Z</dc:date>
    </item>
  </channel>
</rss>

