<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Group by and count removes duplicate rows in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862240#M340566</link>
    <description>&lt;P&gt;The error is in your data step. The last step has a semi colon is at the incorrect location just after Carl&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sajid01_0-1677886261135.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/81061iD47A370F9AB5739C/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Sajid01_0-1677886261135.png" alt="Sajid01_0-1677886261135.png" /&gt;&lt;/span&gt;&lt;BR /&gt;The corrected code is shown below. I moved the semicolon to the next line. &lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data t;
input ID$ name$;
cards;
a010 Steve
a010 James
a011 Harvey
a012 Carl
a012 Carl
;
run;

proc sql;
select ID, NAME, count(*) as n
from t
group by id, name;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The result would be as expected. Nothing is deleted&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sajid01_1-1677886414291.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/81062iAF2C06FEE686CCC7/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Sajid01_1-1677886414291.png" alt="Sajid01_1-1677886414291.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 03 Mar 2023 23:34:08 GMT</pubDate>
    <dc:creator>Sajid01</dc:creator>
    <dc:date>2023-03-03T23:34:08Z</dc:date>
    <item>
      <title>Group by and count removes duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862104#M340519</link>
      <description>&lt;P&gt;I am using a count + group by to flag rows that are complete duplicates. The result removes the extra rows, while I prefer to keep the original dataset, with just an added column. I don't use a select distinct so can't understand why rows are being deleted. Any clue why it happens and how to avoid it?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data t;
input ID$ name$;
cards;
a010 Steve
a010 James
a011 Harvey
a012 Carl
a012 Carl;
run;

proc sql;
create table dup_flag as
select *, count(*) as n
from t
group by ID, name;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 03 Mar 2023 13:02:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862104#M340519</guid>
      <dc:creator>SarahDew</dc:creator>
      <dc:date>2023-03-03T13:02:20Z</dc:date>
    </item>
    <item>
      <title>Re: Group by and count removes duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862106#M340520</link>
      <description>&lt;P&gt;Hi &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/244638"&gt;@SarahDew&lt;/a&gt;, when you use GROUP BY, it's going to collapse those columns into a single record based on ID and NAME. If you want to keep the original structure, I would just join the original table to your original query:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data t;
input ID$ name :$6.;
cards;
a010 Steve
a010 James
a011 Harvey
a012 Carl
a012 Carl
;
run;

proc sql;
	create table dup_flag as
		select 
			t.*
			, a.n
		from 
			t
		left join
			(
				select
					id
					, name
					, count(*) as n
				from
					t
				group by
					id
					, name
			) a
			on t.id = a.id
			and t.name = a.name
	; 
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;PRE&gt;ID	name	n
a010	James	1
a010	Steve	1
a011	Harvey	1
a012	Carl	2
a012	Carl	2&lt;/PRE&gt;
&lt;P&gt;I think that's what you're trying to get at, but I may not have understood the question. Let me know - happy to help.&lt;/P&gt;</description>
      <pubDate>Fri, 03 Mar 2023 13:20:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862106#M340520</guid>
      <dc:creator>maguiremq</dc:creator>
      <dc:date>2023-03-03T13:20:05Z</dc:date>
    </item>
    <item>
      <title>Re: Group by and count removes duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862140#M340540</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/244638"&gt;@SarahDew&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Alternatively, you can group by a unique key derived from &lt;FONT face="courier new,courier"&gt;ID&lt;/FONT&gt; and &lt;FONT face="courier new,courier"&gt;name&lt;/FONT&gt;&amp;nbsp;(instead of by &lt;FONT face="courier new,courier"&gt;ID, name&lt;/FONT&gt;). This will trigger automatic remerging (see the note in the log "&lt;FONT face="courier new,courier" color="#3366FF"&gt;The query requires remerging summary statistics back with the original data.&lt;/FONT&gt;") and thus prevent the unwanted aggregation. For your example data (and in most other cases) a simple concatenation works as the unique key:&lt;/P&gt;
&lt;PRE&gt;&lt;FONT size="4"&gt;group by ID&lt;FONT color="#3366FF"&gt;&lt;STRONG&gt;||&lt;/STRONG&gt;&lt;/FONT&gt;name;&lt;/FONT&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 03 Mar 2023 15:26:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862140#M340540</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2023-03-03T15:26:21Z</dc:date>
    </item>
    <item>
      <title>Re: Group by and count removes duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862199#M340554</link>
      <description>&lt;P&gt;If all your variables are in the GROUP BY clause, you must request the remerge explicitly, for example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
/*create table dup_flag as */
select *
from t natural join (
  select *, count(*) as n
  from t
  group by ID, name);
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 03 Mar 2023 19:03:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862199#M340554</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2023-03-03T19:03:10Z</dc:date>
    </item>
    <item>
      <title>Re: Group by and count removes duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862202#M340556</link>
      <description>&lt;P&gt;... Or you can simply fool SAS/SQL into auto-remerge by pretending to perform an operation on one of the GROUP BY columns, for example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
/*create table dup_flag as */
  select *, count(*) as n
  from t
  group by ID, trim(name);
quit;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 03 Mar 2023 19:13:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862202#M340556</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2023-03-03T19:13:50Z</dc:date>
    </item>
    <item>
      <title>Re: Group by and count removes duplicate rows</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862240#M340566</link>
      <description>&lt;P&gt;The error is in your data step. The last step has a semi colon is at the incorrect location just after Carl&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sajid01_0-1677886261135.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/81061iD47A370F9AB5739C/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Sajid01_0-1677886261135.png" alt="Sajid01_0-1677886261135.png" /&gt;&lt;/span&gt;&lt;BR /&gt;The corrected code is shown below. I moved the semicolon to the next line. &lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data t;
input ID$ name$;
cards;
a010 Steve
a010 James
a011 Harvey
a012 Carl
a012 Carl
;
run;

proc sql;
select ID, NAME, count(*) as n
from t
group by id, name;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The result would be as expected. Nothing is deleted&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sajid01_1-1677886414291.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/81062iAF2C06FEE686CCC7/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Sajid01_1-1677886414291.png" alt="Sajid01_1-1677886414291.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 03 Mar 2023 23:34:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Group-by-and-count-removes-duplicate-rows/m-p/862240#M340566</guid>
      <dc:creator>Sajid01</dc:creator>
      <dc:date>2023-03-03T23:34:08Z</dc:date>
    </item>
  </channel>
</rss>

