<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to deduplicate data using Proc Sql with 2 different criteria. in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899702#M355581</link>
    <description>&lt;P&gt;Here one option:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE;
  infile datalines dsd truncover;
  input ID $ date :mmddyy10. test_scores Pending $;
  format date mmddyy10.;
  datalines;
2A,9/5/19,100,YES
2A,9/5/19,.,NO
2A,9/5/19,.,NO
3C,10/21/21,.,YES
3C,10/21/21,.,NO
3C,10/21/21,.,NO
;

proc sql;
  create view v_inter as
  select *
  from have
  order by id, test_scores, Pending
  ;
quit;

/* alternative to above SQL: Simple proc sort */
/*proc sort data=have out=v_inter;*/
/*  by id test_scores pending;*/
/*run;*/

data want;
  set v_inter;
  by id;
  if last.id;
run;

proc print data=want;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1698111514995.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/89043iE1A4BEA57294B6ED/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Patrick_0-1698111514995.png" alt="Patrick_0-1698111514995.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 24 Oct 2023 10:24:30 GMT</pubDate>
    <dc:creator>Patrick</dc:creator>
    <dc:date>2023-10-24T10:24:30Z</dc:date>
    <item>
      <title>How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899695#M355577</link>
      <description>&lt;P&gt;Hello,&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am trying to remove duplicates from a large dataset and have been having trouble doing that. I was using proc sql to do it but it seems like it does it for one criteria but not the other. Specifically, it will work for the max test_scores but it won't work for "&lt;CODE class=""&gt; test_scores is null and Pending = 'YES'"&lt;/CODE&gt;&amp;nbsp;. I did this proc sql separately and it will work for the null condition but I want this done all together. Below is sample code and data what my example data looks like and what I want.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;proc sql;
create table WANT as
select*
from HAVE
group by ID
having test_scores = max(test_scores) or test_scores is null and Pending = 'YES';
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Have&lt;/P&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;ID&lt;/TD&gt;&lt;TD&gt;date&lt;/TD&gt;&lt;TD&gt;test_scores&lt;/TD&gt;&lt;TD&gt;Pending&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2A&lt;/TD&gt;&lt;TD&gt;9/5/19&lt;/TD&gt;&lt;TD&gt;100&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2A&lt;/TD&gt;&lt;TD&gt;9/5/19&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2A&lt;/TD&gt;&lt;TD&gt;9/5/19&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3C&lt;/TD&gt;&lt;TD&gt;10/21/21&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3C&lt;/TD&gt;&lt;TD&gt;10/21/21&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3C&lt;/TD&gt;&lt;TD&gt;10/21/21&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Want&amp;nbsp;&lt;/P&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;ID&lt;/TD&gt;&lt;TD&gt;date&lt;/TD&gt;&lt;TD&gt;test_scores&lt;/TD&gt;&lt;TD&gt;Pending&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2A&lt;/TD&gt;&lt;TD&gt;9/5/19&lt;/TD&gt;&lt;TD&gt;100&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3C&lt;/TD&gt;&lt;TD&gt;10/21/21&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;</description>
      <pubDate>Mon, 23 Oct 2023 23:15:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899695#M355577</guid>
      <dc:creator>rebelde52</dc:creator>
      <dc:date>2023-10-23T23:15:11Z</dc:date>
    </item>
    <item>
      <title>Re: How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899700#M355579</link>
      <description>&lt;P&gt;Please help us help you and provide your HAVE data via working SAS datastep code that creates the table.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;A group by requires an aggregate function in the select clause. Your code likely returns some log message that the group by has been converted to an order by.&lt;/P&gt;</description>
      <pubDate>Tue, 24 Oct 2023 01:25:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899700#M355579</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2023-10-24T01:25:35Z</dc:date>
    </item>
    <item>
      <title>Re: How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899701#M355580</link>
      <description>&lt;PRE&gt;&lt;CODE class=""&gt;data HAVE;
   input ID $ date mmddyy10. test_scores Pending $;
   format date mmddyy10.;
   datalines;
2A 9/5/19  100 YES
2A 9/5/19  .    NO
2A 9/5/19  .    NO
3C 10/21/21 .  YES
3C 10/21/21 .  NO
3C 10/21/21 .  NO
;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 24 Oct 2023 01:34:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899701#M355580</guid>
      <dc:creator>rebelde52</dc:creator>
      <dc:date>2023-10-24T01:34:24Z</dc:date>
    </item>
    <item>
      <title>Re: How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899702#M355581</link>
      <description>&lt;P&gt;Here one option:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE;
  infile datalines dsd truncover;
  input ID $ date :mmddyy10. test_scores Pending $;
  format date mmddyy10.;
  datalines;
2A,9/5/19,100,YES
2A,9/5/19,.,NO
2A,9/5/19,.,NO
3C,10/21/21,.,YES
3C,10/21/21,.,NO
3C,10/21/21,.,NO
;

proc sql;
  create view v_inter as
  select *
  from have
  order by id, test_scores, Pending
  ;
quit;

/* alternative to above SQL: Simple proc sort */
/*proc sort data=have out=v_inter;*/
/*  by id test_scores pending;*/
/*run;*/

data want;
  set v_inter;
  by id;
  if last.id;
run;

proc print data=want;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1698111514995.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/89043iE1A4BEA57294B6ED/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Patrick_0-1698111514995.png" alt="Patrick_0-1698111514995.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 24 Oct 2023 10:24:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899702#M355581</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2023-10-24T10:24:30Z</dc:date>
    </item>
    <item>
      <title>Re: How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899716#M355585</link>
      <description>&lt;P&gt;Form the data you have posted something like&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set have;
  where Pending = 'YES';
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;would create the dataset you expect, but most likely only for the small subset posted.&lt;/P&gt;
&lt;P&gt;Can you add some more observations?&lt;/P&gt;
&lt;P&gt;What would you expect in the output dataset, if&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;in the second obs test_score is 200&lt;/LI&gt;
&lt;LI&gt;id=c3 and test_score = 1 in the last obs?&lt;/LI&gt;
&lt;/UL&gt;</description>
      <pubDate>Tue, 24 Oct 2023 06:39:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899716#M355585</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2023-10-24T06:39:13Z</dc:date>
    </item>
    <item>
      <title>Re: How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899723#M355587</link>
      <description>&lt;P&gt;Look at your log. A SELECT * will invariably cause a "automatic remerge", unless&amp;nbsp;&lt;U&gt;all&lt;/U&gt; variables in the dataset are used in the GROUP BY.&lt;/P&gt;
&lt;P&gt;Assuming Pending is character, without a special display format, this should do it:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=have;
by id test_scores pending;
run;

data want;
set have;
by id;
if last.id;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 24 Oct 2023 08:14:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899723#M355587</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2023-10-24T08:14:57Z</dc:date>
    </item>
    <item>
      <title>Re: How to deduplicate data using Proc Sql with 2 different criteria.</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899761#M355607</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*
That is not a good behavior for coding.
Assuming I understood what you need.
*/
data HAVE;
  infile datalines dsd truncover;
  input ID $ date :mmddyy10. test_scores Pending $;
  format date mmddyy10.;
  datalines;
2A,9/5/19,100,YES
2A,9/5/19,.,NO
2A,9/5/19,.,NO
3C,10/21/21,.,YES
3C,10/21/21,.,NO
3C,10/21/21,.,NO
;

proc sql;
create table WANT as
select*
from HAVE
group by ID
having (test_scores = max(test_scores)) and 
       (test_scores is not missing or Pending='YES');
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 24 Oct 2023 11:26:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-deduplicate-data-using-Proc-Sql-with-2-different-criteria/m-p/899761#M355607</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2023-10-24T11:26:57Z</dc:date>
    </item>
  </channel>
</rss>

