<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Removing duplicate rows with additional conditions in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524932#M142789</link>
    <description>&lt;P&gt;Thank you sir!&lt;/P&gt;</description>
    <pubDate>Sun, 06 Jan 2019 22:01:50 GMT</pubDate>
    <dc:creator>Agent1592</dc:creator>
    <dc:date>2019-01-06T22:01:50Z</dc:date>
    <item>
      <title>Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524876#M142762</link>
      <description>&lt;P&gt;Dear SAS Community:&lt;/P&gt;&lt;P&gt;I have certain data. Basically it is panel data with company_id, year and quarter (qtr). I would like to remove duplicate records.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I know I can use the following command:&lt;/P&gt;&lt;P&gt;proc sort data=test nodupkey out=&lt;SPAN&gt;test&amp;nbsp;1&lt;/SPAN&gt;;&lt;BR /&gt;by year qtr;&lt;BR /&gt;run;&lt;/P&gt;&lt;P&gt;However this randomly removes duplicates. I have certain variables (var1, var2, var3, and var4 that are percentages and I would like to keep the maximum values for each year-quarter for var3 and var4. So for example in 1999 I have two different rows- one for which var3 and var4 are equal to 0.5 and another one for which &lt;SPAN&gt;var&lt;/SPAN&gt;&lt;SPAN&gt;3 and&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;var4&lt;/SPAN&gt; are equal to 1. How can I keep the one with the higher value?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data work.TEST;
  infile datalines dsd truncover;
  input company_id:BEST12. year:32. qtr:32. var1:32. var2:32. var3:32. var4:32.;
  format company_id BEST12.;
datalines4;
11334,1992,2,0,0,0,0
11334,1992,3,0,0,0,0
11334,1992,3,0,0,0,0
11334,1992,3,0,0,0,0
11334,1992,4,0,0,0,0
11334,1994,2,0,0,0,0
11334,1994,3,0,0,0,0
11334,1994,3,0,0,0,0
11334,1994,3,0,0,0,0
11334,1994,4,0,0,0,0
11334,1995,2,0,0,0,0
11334,1995,3,0,0,0,0
11334,1995,3,0,0,0,0
11334,1995,3,0,0,0,0
11334,1995,4,0,0,0,0
11334,1996,2,0,0,0,0
11334,1996,3,0,0,0,0
11334,1996,3,0,0,0,0
11334,1996,3,0,0,0,0
11334,1996,4,0,0,0,0
11334,1997,2,0,0,0,0
11334,1997,3,0,0,0,0
11334,1997,3,0,0,0,0
11334,1997,3,0,0,0,0
11334,1997,4,0,0,0,0
11334,1998,2,0,0,0,0
11334,1998,3,0,0,0,0
11334,1998,3,0,0,0,0
11334,1998,3,0,0,0,0
11334,1998,4,0,0,0,0
11334,1999,2,0.5,0.5,0.5,0.5
11334,1999,2,1,1,1,1
11334,1999,3,0.5,0.5,0.5,0.5
11334,1999,3,1,1,1,1
11334,1999,3,0.5,0.5,0.5,0.5
11334,1999,3,1,1,1,1
11334,1999,3,0.5,0.5,0.5,0.5
11334,1999,3,1,1,1,1
11334,1999,4,0.5,0.5,0.5,0.5
11334,1999,4,1,1,1,1
11334,2000,2,0,0,0,0
11334,2000,2,0,0,0,0
11334,2000,3,0,0,0,0
11334,2000,3,0,0,0,0
11334,2000,3,0,0,0,0
11334,2000,3,0,0,0,0
11334,2000,3,0,0,0,0
11334,2000,3,0,0,0,0
11334,2000,4,0,0,0,0
11334,2000,4,0,0,0,0
11334,2001,2,0,0,0,0
11334,2001,2,0,0,0,0
11334,2001,3,0,0,0,0
11334,2001,3,0,0,0,0
11334,2001,3,0,0,0,0
11334,2001,3,0,0,0,0
11334,2001,3,0,0,0,0
11334,2001,3,0,0,0,0
11334,2001,4,0,0,0,0
11334,2001,4,0,0,0,0
11334,2002,2,0,0,0,0
11334,2002,2,0,0,0,0
11334,2002,3,0,0,0,0
11334,2002,3,0,0,0,0
11334,2002,3,0,0,0,0
11334,2002,3,0,0,0,0
11334,2002,3,0,0,0,0
11334,2002,3,0,0,0,0
11334,2002,4,0,0,0,0
11334,2002,4,0,0,0,0
11334,2003,2,1,1,1,1
11334,2003,3,1,1,1,1
11334,2003,3,1,1,1,1
11334,2003,3,1,1,1,1
11334,2003,4,1,1,1,1
11334,2004,2,0,0,0,0
11334,2004,3,0,0,0,0
11334,2004,3,0,0,0,0
11334,2004,3,0,0,0,0
11334,2004,4,0,0,0,0
11334,2005,2,0,0,0,0
11334,2005,3,0,0,0,0
11334,2005,3,0,0,0,0
11334,2005,3,0,0,0,0
11334,2005,4,0,0,0,0
11334,2006,2,0,0,0,0
11334,2006,3,0,0,0,0
11334,2006,3,0,0,0,0
11334,2006,3,0,0,0,0
11334,2006,4,0,0,0,0
11334,2007,2,0,0,0,0
11334,2007,3,0,0,0,0
11334,2007,3,0,0,0,0
11334,2007,3,0,0,0,0
11334,2007,4,0,0,0,0
11334,2008,2,0,0,0,0
11334,2008,3,0,0,0,0
11334,2008,3,0,0,0,0
11334,2008,3,0,0,0,0
11334,2008,4,0,0,0,0
11334,2009,2,0,0,0,0
11334,2009,3,0,0,0,0
11334,2009,3,0,0,0,0
11334,2009,3,0,0,0,0
11334,2009,4,0,0,0,0
11334,2011,2,0,0.5,0.5,0.5
11334,2011,3,0,0.5,0.5,0.5
11334,2011,3,0,0.5,0.5,0.5
11334,2011,3,0,0.5,0.5,0.5
11334,2011,4,0,0.5,0.5,0.5
11334,2013,2,0.5,0.5,0.5,0.5
11334,2013,3,0.5,0.5,0.5,0.5
11334,2013,3,0.5,0.5,0.5,0.5
11334,2013,3,0.5,0.5,0.5,0.5
11334,2013,4,0.5,0.5,0.5,0.5
11334,2014,2,0,0,0,0
11334,2014,3,0,0,0,0
11334,2014,3,0,0,0,0
11334,2014,3,0,0,0,0
11334,2014,4,0,0,0,0
11334,2016,2,0,0,0,0
11334,2016,3,0,0,0,0
11334,2016,3,0,0,0,0
11334,2016,3,0,0,0,0
11334,2016,4,0,0,0,0
;;;;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 06 Jan 2019 10:13:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524876#M142762</guid>
      <dc:creator>Agent1592</dc:creator>
      <dc:date>2019-01-06T10:13:03Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524879#M142763</link>
      <description>&lt;P&gt;Sort by company_id, year, quarter&amp;nbsp;&lt;EM&gt;and&lt;/EM&gt; the selective variable, then use last. to extract the higher value:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=have;
by company_id year quarter var3;
run;

data want;
set have;
by company_id year quarter;
if last.quarter;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 06 Jan 2019 10:56:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524879#M142763</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2019-01-06T10:56:07Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524909#M142773</link>
      <description>&lt;P&gt;Here is a hash approach that does not require sorting.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _null_;
   if _N_ = 1 then do;
      declare hash h(dataset:'test(obs=0)', ordered:'A');
      h.defineKey('company_id', 'year', 'qtr');
      h.defineData(all:'Y');
      h.defineDone();
   end;

   set test end=eof;

   if h.check() ne 0 then h.add();
   else do;
      _var1=var1;_var2=var2;_var3=var3;_var4=var4;
      rc=h.find();
      if var3 &amp;lt;= _var3 then do;
         var1=_var1;var2=_var2;var3=_var3;var4=_var4;
         h.replace();
      end;
   end;

   if eof then h.output(dataset:'want');
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/11562"&gt;@Kurt_Bremser&lt;/a&gt;s and my solution generates the exact same data set though &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 06 Jan 2019 17:22:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524909#M142773</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2019-01-06T17:22:59Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524919#M142780</link>
      <description>&lt;P&gt;SAS gives multiple approaches to the same issue.&lt;/P&gt;&lt;P&gt;This is mine&lt;/P&gt;&lt;P&gt;proc sql;&lt;BR /&gt;create table work.test2 as&lt;BR /&gt;select year,qtr , Max(var3) as var3, Max(var4) as var4 from work.test&lt;BR /&gt;group by year, qtr&lt;BR /&gt;order by year, qtr;&lt;BR /&gt;quit;&lt;/P&gt;</description>
      <pubDate>Sun, 06 Jan 2019 17:56:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524919#M142780</guid>
      <dc:creator>Sajid01</dc:creator>
      <dc:date>2019-01-06T17:56:14Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524924#M142783</link>
      <description>If you want the MAX for each value for a particular grouping, PROC MEANS/SUMMARY is the most efficient method. &lt;BR /&gt;&lt;BR /&gt;proc means data=have noprint Nway;&lt;BR /&gt;class company year qtr;&lt;BR /&gt;output out=want max = / autoname autolabel;&lt;BR /&gt;run;</description>
      <pubDate>Sun, 06 Jan 2019 19:23:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524924#M142783</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2019-01-06T19:23:43Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524931#M142788</link>
      <description>Thank you!</description>
      <pubDate>Sun, 06 Jan 2019 22:01:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524931#M142788</guid>
      <dc:creator>Agent1592</dc:creator>
      <dc:date>2019-01-06T22:01:33Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with additional conditions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524932#M142789</link>
      <description>&lt;P&gt;Thank you sir!&lt;/P&gt;</description>
      <pubDate>Sun, 06 Jan 2019 22:01:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-additional-conditions/m-p/524932#M142789</guid>
      <dc:creator>Agent1592</dc:creator>
      <dc:date>2019-01-06T22:01:50Z</dc:date>
    </item>
  </channel>
</rss>

