<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Distinct count and group by multiple variables in New SAS User</title>
    <link>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586512#M14516</link>
    <description>&lt;P&gt;You can try below code&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data welcome;
infile datalines dlm=",";
input ID $ DeptID $ Name :$20. svcdate :yymmdd10. Fiscal_Year Hours Amount ;
format svcdate yymmddd10.;
datalines;
1234, 7, East, 2019-01-01, 2019, 1.50, 109.7
1234, 7, East, 2019-01-01, 2019, 7.50, 509.7
6543, 7, East, 2019-01-1, 2019, 2.75, 340.82
7847, 8, West, 2019-02-01, 2019, 5, 7000
5818, 10, North, 2019-03-01, 2019, 6, 1032
0844, 3, South, 2019-04-01, 2019, 40, 855
4499, 5, West, 2018-09-01, 2019, 10, 790
1234, 7, East, 2019-09-01, 2019, 6.50, 1900.7
3000, 2, North, 2018-12-01, 2019, 11.75, 874.79
3785, 27, West, 2018-11-01, 2019, 20.3, 450.64

;
run;

proc sql;
create table want as select DeptID, Name, svcdate, Fiscal_Year , sum(Hours) as hours, sum(amount) as amount, count(id) as count 
from welcome group by DeptID, Name, svcdate, Fiscal_Year ;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Thu, 05 Sep 2019 18:33:31 GMT</pubDate>
    <dc:creator>Jagadishkatam</dc:creator>
    <dc:date>2019-09-05T18:33:31Z</dc:date>
    <item>
      <title>Distinct count and group by multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586498#M14508</link>
      <description>&lt;P&gt;I am trying to generate a summary table, but I can't seem to think of how to execute it. What I need is to add a column with distinct count of ID by DeptID (or name) and by month of service. The 'hours' and 'amount' columns would need to be summed accordingly. I currently have a table but I would like it to contain distinct count of ID.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;proc means data = services_fortab noprint nway sum;&lt;BR /&gt;var hours paid;&lt;BR /&gt;class vendorid vendorname svc_month Fiscal_Year SvcLocDesc ;&lt;BR /&gt;output out = services_fortab2(drop = _TYPE_ _FREQ_) sum=;&lt;BR /&gt;run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data welcome;&lt;BR /&gt;infile datalines dlm=",";&lt;BR /&gt;input ID $ DeptID $ Name :$20. svcdate :yymmdd10. Fiscal_Year Hours Amount ;&lt;BR /&gt;format svcdate yymmddd10.;&lt;BR /&gt;datalines;&lt;BR /&gt;1234, 7, East, 2019-01-01, 2019, 1.50, 109.7&lt;BR /&gt;1234, 7, East, 2019-01-01, 2019, 7.50, 509.7&lt;BR /&gt;6543, 7, East, 2019-01-1, 2019, 2.75, 340.82&lt;BR /&gt;7847, 8, West, 2019-02-01, 2019, 5, 7000&lt;BR /&gt;5818, 10, North, 2019-03-01, 2019, 6, 1032&lt;BR /&gt;0844, 3, South, 2019-04-01, 2019, 40, 855&lt;BR /&gt;4499, 5, West, 2018-09-01, 2019, 10, 790&lt;BR /&gt;1234, 7, East, 2019-09-01, 2019, 6.50, 1900.7&lt;BR /&gt;3000, 2, North, 2018-12-01, 2019, 11.75, 874.79&lt;BR /&gt;3785, 27, West, 2018-11-01, 2019, 20.3, 450.64&lt;/P&gt;&lt;P&gt;;&lt;BR /&gt;run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'd like the output to look something like this:&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;DeptID&lt;/TD&gt;&lt;TD&gt;Name&lt;/TD&gt;&lt;TD&gt;svcdate&lt;/TD&gt;&lt;TD&gt;Fiscal Year&lt;/TD&gt;&lt;TD&gt;Hours&lt;/TD&gt;&lt;TD&gt;Amount&lt;/TD&gt;&lt;TD&gt;Count&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;East&lt;/TD&gt;&lt;TD&gt;1/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;10&lt;/TD&gt;&lt;TD&gt;5324&lt;/TD&gt;&lt;TD&gt;50&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;East&lt;/TD&gt;&lt;TD&gt;3/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;756&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;8&lt;/TD&gt;&lt;TD&gt;West&lt;/TD&gt;&lt;TD&gt;2/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;67&lt;/TD&gt;&lt;TD&gt;2366&lt;/TD&gt;&lt;TD&gt;34&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;8&lt;/TD&gt;&lt;TD&gt;West&lt;/TD&gt;&lt;TD&gt;4/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;98&lt;/TD&gt;&lt;TD&gt;7899&lt;/TD&gt;&lt;TD&gt;467&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;10&lt;/TD&gt;&lt;TD&gt;North&lt;/TD&gt;&lt;TD&gt;2/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;33&lt;/TD&gt;&lt;TD&gt;956&lt;/TD&gt;&lt;TD&gt;64&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;10&lt;/TD&gt;&lt;TD&gt;North&lt;/TD&gt;&lt;TD&gt;3/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;100&lt;/TD&gt;&lt;TD&gt;5899&lt;/TD&gt;&lt;TD&gt;223&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;South&lt;/TD&gt;&lt;TD&gt;9/1/2018&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;93&lt;/TD&gt;&lt;TD&gt;578&lt;/TD&gt;&lt;TD&gt;11&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;South&lt;/TD&gt;&lt;TD&gt;2/1/2019&lt;/TD&gt;&lt;TD&gt;2019&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;TD&gt;235&lt;/TD&gt;&lt;TD&gt;176&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;I know the solution is likely obvious, but I can't figure it out. Thank you!&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Sep 2019 18:02:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586498#M14508</guid>
      <dc:creator>colabear</dc:creator>
      <dc:date>2019-09-05T18:02:25Z</dc:date>
    </item>
    <item>
      <title>Re: Distinct count and group by multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586512#M14516</link>
      <description>&lt;P&gt;You can try below code&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data welcome;
infile datalines dlm=",";
input ID $ DeptID $ Name :$20. svcdate :yymmdd10. Fiscal_Year Hours Amount ;
format svcdate yymmddd10.;
datalines;
1234, 7, East, 2019-01-01, 2019, 1.50, 109.7
1234, 7, East, 2019-01-01, 2019, 7.50, 509.7
6543, 7, East, 2019-01-1, 2019, 2.75, 340.82
7847, 8, West, 2019-02-01, 2019, 5, 7000
5818, 10, North, 2019-03-01, 2019, 6, 1032
0844, 3, South, 2019-04-01, 2019, 40, 855
4499, 5, West, 2018-09-01, 2019, 10, 790
1234, 7, East, 2019-09-01, 2019, 6.50, 1900.7
3000, 2, North, 2018-12-01, 2019, 11.75, 874.79
3785, 27, West, 2018-11-01, 2019, 20.3, 450.64

;
run;

proc sql;
create table want as select DeptID, Name, svcdate, Fiscal_Year , sum(Hours) as hours, sum(amount) as amount, count(id) as count 
from welcome group by DeptID, Name, svcdate, Fiscal_Year ;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 05 Sep 2019 18:33:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586512#M14516</guid>
      <dc:creator>Jagadishkatam</dc:creator>
      <dc:date>2019-09-05T18:33:31Z</dc:date>
    </item>
    <item>
      <title>Re: Distinct count and group by multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586523#M14520</link>
      <description>&lt;P&gt;Hello Jag,&lt;/P&gt;&lt;P&gt;Thank you for the quick response! The count I am getting is not a sum of distinct ID. I want to show how many individuals were seen at the department X during month XX-YYYY.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you!&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Sep 2019 18:46:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586523#M14520</guid>
      <dc:creator>colabear</dc:creator>
      <dc:date>2019-09-05T18:46:23Z</dc:date>
    </item>
    <item>
      <title>Re: Distinct count and group by multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586528#M14523</link>
      <description>&lt;P&gt;Sadly it isn't. There isn't a default function to do distinct counts in SAS so you need to calculate it externally and then merge it in with your other table.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;There are two methods to do this that I recommend, PROC SQL or double PROC FREQ. Examples for both are below. To scale it for multiple variables add you extra variables to the GROUP BY or TABLE statement.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*This demonstrates how to count the number of unique occurences of a variable
across groups. It uses the SASHELP.CARS dataset which is available with any SAS installation.
The objective is to determine the number of unique car makers by origin/

Note: The SQL solution can be off if you have a large data set and these are not the only two ways to calculate distinct counts.
If you're dealing with a large data set other methods may be appropriate.*/

*Count distinct IDs;
proc sql;
create table distinct_sql as
select origin, count(distinct make) as n_make
from sashelp.cars
group by origin;
quit;

*Double PROC FREQ;
proc freq data=sashelp.cars noprint;
table origin * make / out=origin_make;
run;

proc freq data=origin_make noprint;
table origin / out= distinct_freq;
run;

title 'PROC FREQ';
proc print data=distinct_freq;
run;
title 'PROC SQL';
proc print data=distinct_sql;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/109965"&gt;@colabear&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;I am trying to generate a summary table, but I can't seem to think of how to execute it. What I need is to add a column with distinct count of ID by DeptID (or name) and by month of service. The 'hours' and 'amount' columns would need to be summed accordingly. I currently have a table but I would like it to contain distinct count of ID.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc means data = services_fortab noprint nway sum;&lt;BR /&gt;var hours paid;&lt;BR /&gt;class vendorid vendorname svc_month Fiscal_Year SvcLocDesc ;&lt;BR /&gt;output out = services_fortab2(drop = _TYPE_ _FREQ_) sum=;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;data welcome;&lt;BR /&gt;infile datalines dlm=",";&lt;BR /&gt;input ID $ DeptID $ Name :$20. svcdate :yymmdd10. Fiscal_Year Hours Amount ;&lt;BR /&gt;format svcdate yymmddd10.;&lt;BR /&gt;datalines;&lt;BR /&gt;1234, 7, East, 2019-01-01, 2019, 1.50, 109.7&lt;BR /&gt;1234, 7, East, 2019-01-01, 2019, 7.50, 509.7&lt;BR /&gt;6543, 7, East, 2019-01-1, 2019, 2.75, 340.82&lt;BR /&gt;7847, 8, West, 2019-02-01, 2019, 5, 7000&lt;BR /&gt;5818, 10, North, 2019-03-01, 2019, 6, 1032&lt;BR /&gt;0844, 3, South, 2019-04-01, 2019, 40, 855&lt;BR /&gt;4499, 5, West, 2018-09-01, 2019, 10, 790&lt;BR /&gt;1234, 7, East, 2019-09-01, 2019, 6.50, 1900.7&lt;BR /&gt;3000, 2, North, 2018-12-01, 2019, 11.75, 874.79&lt;BR /&gt;3785, 27, West, 2018-11-01, 2019, 20.3, 450.64&lt;/P&gt;
&lt;P&gt;;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I'd like the output to look something like this:&lt;/P&gt;
&lt;TABLE&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;DeptID&lt;/TD&gt;
&lt;TD&gt;Name&lt;/TD&gt;
&lt;TD&gt;svcdate&lt;/TD&gt;
&lt;TD&gt;Fiscal Year&lt;/TD&gt;
&lt;TD&gt;Hours&lt;/TD&gt;
&lt;TD&gt;Amount&lt;/TD&gt;
&lt;TD&gt;Count&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;7&lt;/TD&gt;
&lt;TD&gt;East&lt;/TD&gt;
&lt;TD&gt;1/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;10&lt;/TD&gt;
&lt;TD&gt;5324&lt;/TD&gt;
&lt;TD&gt;50&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;7&lt;/TD&gt;
&lt;TD&gt;East&lt;/TD&gt;
&lt;TD&gt;3/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;6&lt;/TD&gt;
&lt;TD&gt;756&lt;/TD&gt;
&lt;TD&gt;3&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;8&lt;/TD&gt;
&lt;TD&gt;West&lt;/TD&gt;
&lt;TD&gt;2/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;67&lt;/TD&gt;
&lt;TD&gt;2366&lt;/TD&gt;
&lt;TD&gt;34&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;8&lt;/TD&gt;
&lt;TD&gt;West&lt;/TD&gt;
&lt;TD&gt;4/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;98&lt;/TD&gt;
&lt;TD&gt;7899&lt;/TD&gt;
&lt;TD&gt;467&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;10&lt;/TD&gt;
&lt;TD&gt;North&lt;/TD&gt;
&lt;TD&gt;2/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;33&lt;/TD&gt;
&lt;TD&gt;956&lt;/TD&gt;
&lt;TD&gt;64&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;10&lt;/TD&gt;
&lt;TD&gt;North&lt;/TD&gt;
&lt;TD&gt;3/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;100&lt;/TD&gt;
&lt;TD&gt;5899&lt;/TD&gt;
&lt;TD&gt;223&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;3&lt;/TD&gt;
&lt;TD&gt;South&lt;/TD&gt;
&lt;TD&gt;9/1/2018&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;93&lt;/TD&gt;
&lt;TD&gt;578&lt;/TD&gt;
&lt;TD&gt;11&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;3&lt;/TD&gt;
&lt;TD&gt;South&lt;/TD&gt;
&lt;TD&gt;2/1/2019&lt;/TD&gt;
&lt;TD&gt;2019&lt;/TD&gt;
&lt;TD&gt;25&lt;/TD&gt;
&lt;TD&gt;235&lt;/TD&gt;
&lt;TD&gt;176&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;I know the solution is likely obvious, but I can't figure it out. Thank you!&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Sep 2019 18:52:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Distinct-count-and-group-by-multiple-variables/m-p/586528#M14523</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2019-09-05T18:52:06Z</dc:date>
    </item>
  </channel>
</rss>

