<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Duplicate rows in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336297#M76217</link>
    <description>&lt;P&gt;Count(*) counts rows, it is not checking columns. The way the query I sent works is it looks for dups by a &lt;EM&gt;combination&lt;/EM&gt; of the three columns. So if there are multiple rows with the same values for SUBJECT, MHTERM, MHST, then those will be returned in the result set. If you are trying to look for dups either by SUBJECT or by MHTERM or by MHST independently of the other two columns, then it will be a different query. Can you try the query I sent and see if the output is&amp;nbsp;what you need?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table dup as 
select *
from mh
group by subject, mhterm, mhts /* this is grouping the data, every combination of values in these three columns will be put in a separate group */
having count(*) &amp;gt;1; /* count(*) simply counts rows in each group AFTER grouping is completed above */
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;If there is more than 1 row within each group that is created by 'group by' statement, then all those rows are considered dups and returned.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 27 Feb 2017 18:54:01 GMT</pubDate>
    <dc:creator>nehalsanghvi</dc:creator>
    <dc:date>2017-02-27T18:54:01Z</dc:date>
    <item>
      <title>Duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336287#M76208</link>
      <description>&lt;P&gt;Dear All,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Can some one help me with output for identifying the duplicates. I am using the below code but i would like to output all the duplicates into output based on the three keys.I am thinking SQL should be good option but I am not sure of using sql.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;PROC SORT DATA=MH OUT=MH1 DUPOUT=MH_DUP NODUPKEY;&lt;BR /&gt;BY SUBJECT MHTERM MHST;&lt;BR /&gt;RUN;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;/* CAN SOMEONEHELP IN CORRECTING THE BELOW CODE&lt;/P&gt;
&lt;P&gt;proc sql;&lt;/P&gt;
&lt;P&gt;create table dup as&amp;nbsp;&lt;/P&gt;
&lt;P&gt;select * from mh&lt;/P&gt;
&lt;P&gt;having count(subject,mhterm,mhts) &amp;gt;1;&lt;/P&gt;
&lt;P&gt;quit;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks&lt;/P&gt;
&lt;P&gt;Rakesh&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 18:09:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336287#M76208</guid>
      <dc:creator>rakeshvvv</dc:creator>
      <dc:date>2017-02-27T18:09:23Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336291#M76211</link>
      <description>&lt;P&gt;Try this:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table dup as 
select *
from mh
group by subject, mhterm, mhts
having count(*) &amp;gt;1;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 27 Feb 2017 18:22:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336291#M76211</guid>
      <dc:creator>nehalsanghvi</dc:creator>
      <dc:date>2017-02-27T18:22:38Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336294#M76214</link>
      <description>&lt;P&gt;There may be other possibilities, but this will work:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc sort data=have;&lt;/P&gt;
&lt;P&gt;by subject mhterm mhst;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;data want;&lt;/P&gt;
&lt;P&gt;set have;&lt;/P&gt;
&lt;P&gt;by subject mhterm mhst;&lt;/P&gt;
&lt;P&gt;if first.subject=first.mhterm=first.mhst=1 then delete;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 18:33:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336294#M76214</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2017-02-27T18:33:41Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336295#M76215</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;My Dataset have more then ten collumns but I would need to find duplicates for only three collumns &lt;STRONG&gt;SUBJECT, MHTERM, MHST.&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;using &amp;nbsp;count(*) would check for all the collumns rather then&amp;nbsp;SUBJECT, MHTERM, MHST. Can you correct me if my assumption is wrong.&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 18:38:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336295#M76215</guid>
      <dc:creator>rakeshvvv</dc:creator>
      <dc:date>2017-02-27T18:38:15Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336297#M76217</link>
      <description>&lt;P&gt;Count(*) counts rows, it is not checking columns. The way the query I sent works is it looks for dups by a &lt;EM&gt;combination&lt;/EM&gt; of the three columns. So if there are multiple rows with the same values for SUBJECT, MHTERM, MHST, then those will be returned in the result set. If you are trying to look for dups either by SUBJECT or by MHTERM or by MHST independently of the other two columns, then it will be a different query. Can you try the query I sent and see if the output is&amp;nbsp;what you need?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table dup as 
select *
from mh
group by subject, mhterm, mhts /* this is grouping the data, every combination of values in these three columns will be put in a separate group */
having count(*) &amp;gt;1; /* count(*) simply counts rows in each group AFTER grouping is completed above */
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;If there is more than 1 row within each group that is created by 'group by' statement, then all those rows are considered dups and returned.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 18:54:01 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336297#M76217</guid>
      <dc:creator>nehalsanghvi</dc:creator>
      <dc:date>2017-02-27T18:54:01Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336316#M76228</link>
      <description>&lt;P&gt;The query is working well for me. Now, I understood how the code is working......Thanks for the detailed explanation.&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2017 19:38:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-rows/m-p/336316#M76228</guid>
      <dc:creator>rakeshvvv</dc:creator>
      <dc:date>2017-02-27T19:38:16Z</dc:date>
    </item>
  </channel>
</rss>

