<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Duplicate Count with Datastep in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762781#M241537</link>
    <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data customers;
input id name $ 12.  Age  Address $ 12. Salary ;
format Salary 10.2 ;
datalines;
1  Ramesh      32   Ahmedabad    2000.00  
2  Khilan      25   Delhi        1500.00  
3  kaushik     23   Kota         2000.00  
4  Chaitali    25   Mumbai       6500.00  
5  Hardik      27   Bhopal       8500.00  
6  Komal       22   MP           4500.00  
7  Muffy       24   Indore      10000.00 
;
proc print;
run;


proc sort data= Customers;
by salary;
run;



proc sql;
select salary from customers
order by salary;
quit;

proc sql;
select distinct salary from customers
order by salary;
quit;


/* Count same salary */


proc sort data=Customers;
by salary;
run;

data dup_count (Keep= Salary Count)   ;
set Customers ;
by salary;
if first.salary then Count=0;
count+1;
if last.salary;
proc print noobs;
run;


/* Keep only Duplicates obs using Datastep */
data dup_count    ;
set Customers ;
by salary;
if not(first.salary  and last.salary) then output ;
proc print noobs;
run;

/* Keep only Duplicates rows using  proc sql */

proc sql; 
    
    select Salary, count(*) as count 
    from customers
    group by Salary
    having COUNT &amp;gt; 1 
    ;
quit;


data dup_count;
set customers;
by salary;
if first.salary then Count=0;
count+1;
if last.salary;
proc print;
run;


&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Required output using datastep&lt;/P&gt;
&lt;TABLE class="table" aria-label="Query Results"&gt;
&lt;THEAD&gt;
&lt;TR&gt;
&lt;TH class="r b header" scope="col"&gt;Salary&lt;/TH&gt;
&lt;TH class="r b header" scope="col"&gt;count&lt;/TH&gt;
&lt;/TR&gt;
&lt;/THEAD&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD class="r data"&gt;2000.00&lt;/TD&gt;
&lt;TD class="r data"&gt;2&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 20 Aug 2021 08:32:39 GMT</pubDate>
    <dc:creator>BrahmanandaRao</dc:creator>
    <dc:date>2021-08-20T08:32:39Z</dc:date>
    <item>
      <title>Duplicate Count with Datastep</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762781#M241537</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data customers;
input id name $ 12.  Age  Address $ 12. Salary ;
format Salary 10.2 ;
datalines;
1  Ramesh      32   Ahmedabad    2000.00  
2  Khilan      25   Delhi        1500.00  
3  kaushik     23   Kota         2000.00  
4  Chaitali    25   Mumbai       6500.00  
5  Hardik      27   Bhopal       8500.00  
6  Komal       22   MP           4500.00  
7  Muffy       24   Indore      10000.00 
;
proc print;
run;


proc sort data= Customers;
by salary;
run;



proc sql;
select salary from customers
order by salary;
quit;

proc sql;
select distinct salary from customers
order by salary;
quit;


/* Count same salary */


proc sort data=Customers;
by salary;
run;

data dup_count (Keep= Salary Count)   ;
set Customers ;
by salary;
if first.salary then Count=0;
count+1;
if last.salary;
proc print noobs;
run;


/* Keep only Duplicates obs using Datastep */
data dup_count    ;
set Customers ;
by salary;
if not(first.salary  and last.salary) then output ;
proc print noobs;
run;

/* Keep only Duplicates rows using  proc sql */

proc sql; 
    
    select Salary, count(*) as count 
    from customers
    group by Salary
    having COUNT &amp;gt; 1 
    ;
quit;


data dup_count;
set customers;
by salary;
if first.salary then Count=0;
count+1;
if last.salary;
proc print;
run;


&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Required output using datastep&lt;/P&gt;
&lt;TABLE class="table" aria-label="Query Results"&gt;
&lt;THEAD&gt;
&lt;TR&gt;
&lt;TH class="r b header" scope="col"&gt;Salary&lt;/TH&gt;
&lt;TH class="r b header" scope="col"&gt;count&lt;/TH&gt;
&lt;/TR&gt;
&lt;/THEAD&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD class="r data"&gt;2000.00&lt;/TD&gt;
&lt;TD class="r data"&gt;2&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 20 Aug 2021 08:32:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762781#M241537</guid>
      <dc:creator>BrahmanandaRao</dc:creator>
      <dc:date>2021-08-20T08:32:39Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate Count with Datastep</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762782#M241538</link>
      <description>&lt;P&gt;What is the logic here? Do you simply want a frequency table only with salaries with a frequency &amp;gt; 1?&lt;/P&gt;</description>
      <pubDate>Fri, 20 Aug 2021 08:35:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762782#M241538</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2021-08-20T08:35:50Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate Count with Datastep</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762796#M241543</link>
      <description>&lt;P&gt;If so, then the data step is not the right tool:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data customers;
input id name $ 12.  Age  Address $ 12. Salary ;
format Salary 10.2 ;
datalines;
1  Ramesh      32   Ahmedabad    2000.00  
2  Khilan      25   Delhi        1500.00  
3  kaushik     23   Kota         2000.00  
4  Chaitali    25   Mumbai       6500.00  
5  Hardik      27   Bhopal       8500.00  
6  Komal       22   MP           4500.00  
7  Muffy       24   Indore      10000.00 
;

proc summary data = customers nway;
   class salary;
   var salary;
   output out = want(where = (count &amp;gt; 1) drop = _:) n = count;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 20 Aug 2021 09:40:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762796#M241543</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2021-08-20T09:40:19Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate Count with Datastep</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762804#M241549</link>
      <description>&lt;P&gt;Fully agree to &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/31304"&gt;@PeterClemmensen&lt;/a&gt; : the data step is not the first choice if you have to count.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you have to use a data step, using proc sort before is necessary to keep the data step as simple as possible.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=customers out=sorted(keep= Salary);
   by salary;
run;

data counts;
   set sorted;
   by Salary;
   where Salary = 2000;

   if first.Salary then count = 0;

   count + 1;

   if last.Salary;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Another alternative is proc freq:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc freq data=customers noprint;
   table salary / out=freq_count(where=(salary=2000) drop= percent);
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 20 Aug 2021 10:40:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762804#M241549</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2021-08-20T10:40:51Z</dc:date>
    </item>
    <item>
      <title>Re: Duplicate Count with Datastep</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762809#M241552</link>
      <description>&lt;P&gt;Also, if you have to do this in a data step, here is a hash approach&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data customers;
input id name $ 12.  Age  Address $ 12. Salary ;
format Salary 10.2 ;
datalines;
1  Ramesh      32   Ahmedabad    2000.00  
2  Khilan      25   Delhi        1500.00  
3  kaushik     23   Kota         2000.00  
4  Chaitali    25   Mumbai       6500.00  
5  Hardik      27   Bhopal       8500.00  
6  Komal       22   MP           4500.00  
7  Muffy       24   Indore      10000.00 
;

data _null_;
   dcl hash h();
   h.definekey('Salary');
   h.definedata('Salary', 'count');
   h.definedone();
 
   do until (z);
      set customers end = z;
      if h.find() ne 0 then count = 0;
      count + 1;
      h.replace();
   end;
 
   h.output(dataset : 'want(where = (count &amp;gt; 1))');
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 20 Aug 2021 11:22:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Duplicate-Count-with-Datastep/m-p/762809#M241552</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2021-08-20T11:22:29Z</dc:date>
    </item>
  </channel>
</rss>

