<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: dedup based on a criteria in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/dedup-based-on-a-criteria/m-p/637308#M189448</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/206798"&gt;@nickspencer&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Here's a simple solution:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input id condition $ date :mmddyy.;
format date mmddyy10.;
cards;
1234 NO 3/1/2020
1234 A-1 3/5/2020
1234 P-1 3/2/2020
2345 NO 3/5/2020
2345 NO 3/1/2020 
;

proc sql;
create view _tmp as
select * from have
order by id, condition='NO', date;
quit;

data want;
set _tmp;
by id;
if first.id;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;A more robust ORDER BY clause might be:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;order by id, missing(date), condition='NO', date, condition;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;This would avoid the selection of missing dates if possible (including the case that only condition='NO' occurs with non-missing dates). Moreover, in case of tied observations (same date) the alphabetical order of (not-'NO') conditions would serve as the tie-breaker.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If your dataset is very large and already sorted by ID, a different solution (without sorting) might be more efficient.&lt;/P&gt;</description>
    <pubDate>Fri, 03 Apr 2020 17:07:17 GMT</pubDate>
    <dc:creator>FreelanceReinh</dc:creator>
    <dc:date>2020-04-03T17:07:17Z</dc:date>
    <item>
      <title>dedup based on a criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/dedup-based-on-a-criteria/m-p/637299#M189443</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a dataset with columns ID,&amp;nbsp; condition, date and others. I want to dedup the rows by ID and the earliest date but the issue I am having is if the condition value is 'NO', the other values of column condition&amp;nbsp;should take precedence irrespective of the date.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;ID&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; CONDITION&amp;nbsp;&amp;nbsp;&amp;nbsp; DATE&lt;/P&gt;&lt;P&gt;1234&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; NO&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3/1/2020&lt;/P&gt;&lt;P&gt;1234&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; A-1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3/5/2020&lt;/P&gt;&lt;P&gt;1234&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;P-1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3/2/2020&lt;/P&gt;&lt;P&gt;2345&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; NO&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;3/5/2020&lt;/P&gt;&lt;P&gt;2345&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;NO&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;3/1/2020&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The result should have:&lt;/P&gt;&lt;P&gt;ID&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; CONDITION&amp;nbsp;&amp;nbsp;&amp;nbsp; DATE&lt;/P&gt;&lt;P&gt;1234&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;P-1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3/2/2020&lt;/P&gt;&lt;P&gt;2345&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; NO&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;3/1/2020&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hope this makes sense.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any&amp;nbsp;suggestion is appreciated.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 03 Apr 2020 16:20:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/dedup-based-on-a-criteria/m-p/637299#M189443</guid>
      <dc:creator>nickspencer</dc:creator>
      <dc:date>2020-04-03T16:20:02Z</dc:date>
    </item>
    <item>
      <title>Re: dedup based on a criteria</title>
      <link>https://communities.sas.com/t5/SAS-Programming/dedup-based-on-a-criteria/m-p/637308#M189448</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/206798"&gt;@nickspencer&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Here's a simple solution:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input id condition $ date :mmddyy.;
format date mmddyy10.;
cards;
1234 NO 3/1/2020
1234 A-1 3/5/2020
1234 P-1 3/2/2020
2345 NO 3/5/2020
2345 NO 3/1/2020 
;

proc sql;
create view _tmp as
select * from have
order by id, condition='NO', date;
quit;

data want;
set _tmp;
by id;
if first.id;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;A more robust ORDER BY clause might be:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;order by id, missing(date), condition='NO', date, condition;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;This would avoid the selection of missing dates if possible (including the case that only condition='NO' occurs with non-missing dates). Moreover, in case of tied observations (same date) the alphabetical order of (not-'NO') conditions would serve as the tie-breaker.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If your dataset is very large and already sorted by ID, a different solution (without sorting) might be more efficient.&lt;/P&gt;</description>
      <pubDate>Fri, 03 Apr 2020 17:07:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/dedup-based-on-a-criteria/m-p/637308#M189448</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2020-04-03T17:07:17Z</dc:date>
    </item>
  </channel>
</rss>

