<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Deleting duplicates based on multiple criteria in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377284#M90580</link>
    <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/77163"&gt;@Oligolas&lt;/a&gt; wrote:&lt;BR /&gt;
&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;with 5 billions rows, hash may be a much faster than my SQL:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Only if you have enough memory available:&lt;/P&gt;
&lt;P&gt;assuming that 10 % of observations have status = 'nu', you get 26 * 500,000,000 ~ 12.4 Gbytes raw size for the hash object, without the hash tree itself.&lt;/P&gt;
&lt;P&gt;Operations on datasets, OTOH, scale until you run out of disk space, which comes much later.&lt;/P&gt;</description>
    <pubDate>Wed, 19 Jul 2017 12:36:48 GMT</pubDate>
    <dc:creator>Kurt_Bremser</dc:creator>
    <dc:date>2017-07-19T12:36:48Z</dc:date>
    <item>
      <title>Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377204#M90557</link>
      <description>&lt;P&gt;hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;there are few edits in the below code&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am stuck with something please help me out with this..&lt;/P&gt;&lt;P&gt;I have a table as-&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;wr_no &amp;nbsp; &amp;nbsp;rfa_no &amp;nbsp; stk_ref &amp;nbsp; &amp;nbsp; status&lt;/P&gt;&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fp&lt;/P&gt;&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fp&lt;/P&gt;&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;y &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fp&lt;/P&gt;&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; nu&lt;/P&gt;&lt;P&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1&lt;/P&gt;&lt;P&gt;2 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; fp&lt;/P&gt;&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; gd&lt;/P&gt;&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 2&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; gp&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1 &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;x &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; gc&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to create &amp;nbsp;a flag as 0 for records having status 'nu' i.e for every &amp;nbsp;'nu' of &amp;nbsp;stk_ref i want to flag 0 &amp;nbsp;'fp' but not ('gd','gp','gc') with reference to my &lt;SPAN&gt;stk_ref ,wr_no and rfa_no.so&amp;nbsp;basically h&lt;/SPAN&gt;ere i want to flag 0&amp;nbsp;1st and 4th record&amp;nbsp;&lt;/P&gt;&lt;P&gt;because for the same stk_ref i got 'nu' which has unique &amp;nbsp;wr_no and unique rfa_no.there are &amp;nbsp;blank&amp;nbsp;&lt;SPAN&gt;stk_ref and status also for which the flag should be zero.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;And&amp;nbsp;the table contains&amp;nbsp;5billion rows.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 01 Aug 2017 07:54:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377204#M90557</guid>
      <dc:creator>nid197</dc:creator>
      <dc:date>2017-08-01T07:54:54Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377210#M90558</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;something like that?&lt;/P&gt;&lt;PRE&gt;DATA have;
input wr_no rfa_no stk_ref $ status $;
datalines;
1 1 x fp
1 2 x fp
1 1 y fp
1 1 x nu
2 1 x fp
;
run;

PROC SQL;
   CREATE TABLE help AS
      SELECT a.*,b.flag
      FROM have a
      LEFT JOIN
        (SELECT *,1 AS flag
         FROM have
         WHERE status eq 'nu') b 
      ON a.wr_no eq b.wr_no
      AND a.rfa_no eq b.rfa_no
      AND a.stk_ref eq b.stk_ref
   ;
quit;

data want;
   set help;
   where flag ne 1;
   drop flag;
run;&lt;/PRE&gt;&lt;P&gt;Cheers&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2017 08:57:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377210#M90558</guid>
      <dc:creator>Oligolas</dc:creator>
      <dc:date>2017-07-19T08:57:38Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377219#M90560</link>
      <description>&lt;P&gt;With a data step:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input wr_no rfa_no stk_ref $ status $;
cards;
1               1          x              fp
1               2          x              fp
1               1           y             fp
1               1           x             nu
2               1           x             fp
;
run;

proc sort data=have;
by wr_no rfa_no stk_ref;
run;

data want;
merge
  have (in=a)
  have (in=b rename=(status=_status) where=(_status = 'nu'))
;
by wr_no rfa_no stk_ref;
if a and not b;
drop _status;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 19 Jul 2017 09:48:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377219#M90560</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2017-07-19T09:48:15Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377230#M90563</link>
      <description>&lt;P&gt;Thankyou the code works fine.&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2017 10:44:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377230#M90563</guid>
      <dc:creator>nid197</dc:creator>
      <dc:date>2017-07-19T10:44:18Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377232#M90565</link>
      <description>&lt;P&gt;Amazing!!thankyou so much..&lt;/P&gt;&lt;P&gt;just 1 que can you please explain me how for the other status='FP' it takes flag=1.?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2017 10:48:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377232#M90565</guid>
      <dc:creator>nid197</dc:creator>
      <dc:date>2017-07-19T10:48:04Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377251#M90569</link>
      <description>(SELECT *,1 AS flag FROM have WHERE status eq 'nu') b&lt;BR /&gt;is a subquery table where every row gets flag=1 and names it 'b' henceforth.&lt;BR /&gt;&lt;BR /&gt;SELECT a.*,b.flag&lt;BR /&gt;this selects all columns from 'a' and the column 'flag' from 'b' (which value is 1 everywhere)&lt;BR /&gt;&lt;BR /&gt;The join performed is done by comparing the wr_no, rft_no and stk_ref rows. For each match between table have(a) and the subquery(b), the value of the column flag is appended, that is appended with '1'&lt;BR /&gt;&lt;BR /&gt;Hope this is clear.&lt;BR /&gt;There are many join explanations on the web.&lt;BR /&gt;Maybe you can refer to:&lt;BR /&gt;&lt;A href="https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/" target="_blank"&gt;https://blog.codinghorror.com/a-visual-explanation-of-sql-joins/&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;Cheers</description>
      <pubDate>Wed, 19 Jul 2017 11:13:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377251#M90569</guid>
      <dc:creator>Oligolas</dc:creator>
      <dc:date>2017-07-19T11:13:00Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377271#M90575</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;with 5 billions rows, hash may be a much faster than my SQL:&lt;/P&gt;&lt;PRE&gt;data _NULL_;
   if 0 then set have;
   
   if _N_ eq 1 then do;
      declare hash hnu(dataset: 'work.have (where=(status eq "nu")) duplicate: "add"');
      hnu.defineKey('wr_no','rfa_no','stk_ref');
      hnu.defineDone();
      
      declare hash w(ordered: 'ascending');
      w.defineKey('wr_no','rfa_no','stk_ref');
      w.defineData('wr_no','rfa_no','stk_ref','status');
      w.defineDone();
   end;
   set have end=eof;

   if hnu.find()&amp;gt;0;
   if w.add()&amp;gt;=0;
   if eof then rc=w.output(dataset: 'work.want');
run;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2017 12:25:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377271#M90575</guid>
      <dc:creator>Oligolas</dc:creator>
      <dc:date>2017-07-19T12:25:33Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377284#M90580</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/77163"&gt;@Oligolas&lt;/a&gt; wrote:&lt;BR /&gt;
&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;with 5 billions rows, hash may be a much faster than my SQL:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Only if you have enough memory available:&lt;/P&gt;
&lt;P&gt;assuming that 10 % of observations have status = 'nu', you get 26 * 500,000,000 ~ 12.4 Gbytes raw size for the hash object, without the hash tree itself.&lt;/P&gt;
&lt;P&gt;Operations on datasets, OTOH, scale until you run out of disk space, which comes much later.&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2017 12:36:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377284#M90580</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2017-07-19T12:36:48Z</dc:date>
    </item>
    <item>
      <title>Re: Deleting duplicates based on multiple criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377309#M90588</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input wr_no rfa_no stk_ref $ status $;
cards;
1               1          x              fp
1               2          x              fp
1               1           y             fp
1               1           x             nu
2               1           x             fp
;
run;
proc sql;
select *
 from have
  where catx('|',wr_no ,rfa_no ,stk_ref)
   not in (select catx('|',wr_no ,rfa_no ,stk_ref) from have where status='nu');
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 19 Jul 2017 13:16:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Deleting-duplicates-based-on-multiple-criteria/m-p/377309#M90588</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-07-19T13:16:03Z</dc:date>
    </item>
  </channel>
</rss>

