<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: distinct count in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545754#M151036</link>
    <description>&lt;P&gt;I will create the data set going forward. I need these numbers in a table so I can update this somewhere else. I can't check log for this.&lt;/P&gt;&lt;P&gt;it will be updated in another table where it will say&lt;/P&gt;&lt;P&gt;Total Count=14&lt;/P&gt;&lt;P&gt;Level count=11&lt;/P&gt;</description>
    <pubDate>Mon, 25 Mar 2019 11:44:57 GMT</pubDate>
    <dc:creator>Srigyan</dc:creator>
    <dc:date>2019-03-25T11:44:57Z</dc:date>
    <item>
      <title>distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545276#M150814</link>
      <description>&lt;P&gt;It's more about optimization question. I have a table with 1b record. Now I want to check if the total record is matched with distinct count based on 3 different variables. Though I have a query which runs great on small data, but takes lots of time when I run this on huge data like 1b record.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;i.e.&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;product&lt;/TD&gt;&lt;TD&gt;brand&lt;/TD&gt;&lt;TD&gt;date&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;b1&lt;/TD&gt;&lt;TD&gt;01/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;b3&lt;/TD&gt;&lt;TD&gt;03/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;b3&lt;/TD&gt;&lt;TD&gt;03/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;b4&lt;/TD&gt;&lt;TD&gt;04/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;b1&lt;/TD&gt;&lt;TD&gt;01/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;b2&lt;/TD&gt;&lt;TD&gt;01/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;b3&lt;/TD&gt;&lt;TD&gt;01/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;b4&lt;/TD&gt;&lt;TD&gt;01/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;b1&lt;/TD&gt;&lt;TD&gt;01/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;b1&lt;/TD&gt;&lt;TD&gt;02/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;b3&lt;/TD&gt;&lt;TD&gt;03/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;b3&lt;/TD&gt;&lt;TD&gt;03/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;b4&lt;/TD&gt;&lt;TD&gt;03/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;b4&lt;/TD&gt;&lt;TD&gt;04/10/2013&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The query is;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Proc SQL;&lt;/P&gt;&lt;P&gt;Select Count(distinct(catx(product,brand,date))),count(*)&amp;nbsp; from table1;&lt;/P&gt;&lt;P&gt;quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am using catx becuase if there any space in any of these 3 column, that will be removed.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Above query will give the output 11 &amp;amp; 14.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Is there any way to optimise this query. so I can run on huge data&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2019 11:31:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545276#M150814</guid>
      <dc:creator>Srigyan</dc:creator>
      <dc:date>2019-03-25T11:31:17Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545277#M150815</link>
      <description>&lt;P&gt;"&lt;SPAN&gt;I have a table with &lt;STRONG&gt;1b&lt;/STRONG&gt; record"&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&lt;FONT color="#FF0000"&gt;1billion????&lt;/FONT&gt;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000000"&gt;&lt;STRONG&gt;and &lt;FONT color="#FF0000"&gt;proc sql?&lt;/FONT&gt;&lt;/STRONG&gt;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000000"&gt;&lt;STRONG&gt;&lt;FONT color="#FF0000"&gt;Blimey!!!!!!!&amp;nbsp;&lt;/FONT&gt;&lt;/STRONG&gt;&lt;/FONT&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 22 Mar 2019 16:14:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545277#M150815</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2019-03-22T16:14:43Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545288#M150822</link>
      <description>&lt;P&gt;One simple rule: if datasets are large, don't use proc sql at all.&lt;/P&gt;</description>
      <pubDate>Fri, 22 Mar 2019 16:36:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545288#M150822</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2019-03-22T16:36:15Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545298#M150826</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/15475"&gt;@andreas_lds&lt;/a&gt;&amp;nbsp; &amp;nbsp;Bingo!&amp;nbsp; I like that!&lt;/P&gt;</description>
      <pubDate>Fri, 22 Mar 2019 16:53:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545298#M150826</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2019-03-22T16:53:25Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545317#M150833</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/200312"&gt;@Srigyan&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;It's more of optimisation question. I have a table with 1b record. Now I want to check if the total record is matching with distinct count based on 3 diferent variable. Though I have a query which runs great on small data but takes lot of time when I run this on huge data like 1b record.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Query is;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc sql;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#ff0000"&gt;Select Count(distinct(catx(col1,col2,col3))),count(*)&amp;nbsp; from table1;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;quit;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;is there any way to optimise this query. so I can run on huge data&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Can you provide a small, maybe 15 records or so example of the data this "works" on correctly?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am not sure I understand exactly what you are looking for but when I test on a set I have this does not give a count of the distinct combinations of variables.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;proc sql;
    create table work.sqlcount2 as
    Select Count(distinct(catx(sex,age))) as distinctcount,count(*) as recordcount
    from sashelp.class;
quit;&lt;/PRE&gt;
&lt;P&gt;Shows 6 for the distinctcount but there are actually 11 different combinations of sex and age in the data set.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 22 Mar 2019 17:16:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545317#M150833</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2019-03-22T17:16:20Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545342#M150840</link>
      <description>&lt;P&gt;Use a procedure whose whole existence is to produce frequencies.&amp;nbsp; Because PROC SQL is a much more general purpose procedure, it likely has time-consuming overhead that you don't need.&amp;nbsp;&amp;nbsp; Because you are counting CATX(COL1,COL2,COL3) you apparently want 3-ways crosstabulations:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc freq data=tables noprint;&lt;BR /&gt;&amp;nbsp; tables col1*col2*col3 / out=freqs missing;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;To see what such a table would look like, run&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc freq data=sashelp.cars noprint;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; tables model*type*origin / out=freqs missing;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;There will be one row per combination (including combinations in which 1 or more variable is missing) with a the values of MODEL TYPE ORIGIN, and new variables COUNT and PERCENT.&lt;/P&gt;</description>
      <pubDate>Fri, 22 Mar 2019 18:21:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545342#M150840</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2019-03-22T18:21:59Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545385#M150859</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/31461"&gt;@mkeintz&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;Use a procedure whose whole existence is to produce frequencies.&amp;nbsp; Because PROC SQL is a much more general purpose procedure, it likely has time-consuming overhead that you don't need.&amp;nbsp;&amp;nbsp; Because you are counting CATX(COL1,COL2,COL3) you apparently want 3-ways crosstabulations:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc freq data=tables noprint;&lt;BR /&gt;&amp;nbsp; tables col1*col2*col3 / out=freqs missing;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;To see what such a table would look like, run&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc freq data=sashelp.cars noprint;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; tables model*type*origin / out=freqs missing;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;There will be one row per combination (including combinations in which 1 or more variable is missing) with a the values of MODEL TYPE ORIGIN, and new variables COUNT and PERCENT.&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/31461"&gt;@mkeintz&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Pretty much what I did only I used SASHELP.CLASS as a smaller data set on only two variables. I asked what &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/200312"&gt;@Srigyan&lt;/a&gt;&amp;nbsp;was actually attempting to count because his SQL returned a count of six values for the distinct where there are 11 combinations, so his code seems to be returning a count of the variable with the single largest number of distinct values.&lt;/P&gt;
&lt;PRE&gt;proc sql;
   select distinct(catx(sex,age)) as sexage
   from sashelp.class;
quit;&lt;/PRE&gt;
&lt;P&gt;Yields:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;sexage

11
12
13
14
15
16
&lt;/PRE&gt;</description>
      <pubDate>Fri, 22 Mar 2019 21:35:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545385#M150859</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2019-03-22T21:35:38Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545405#M150871</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/200312"&gt;@Srigyan&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The moment you hit large data volumes the coding approach chosen can make a big difference in performance. Knowing your data will help you a lot in making "the right" choice.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;For your use case:&lt;/P&gt;
&lt;P&gt;1. Is this a table in a database (which one) or is it a SAS table?&lt;/P&gt;
&lt;P&gt;2. If a SAS table: Is the table already sorted by col1-col3?&lt;/P&gt;
&lt;P&gt;3. What number of distinct combinations of col1-col3 do you expect?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;A SQL DISTINCT requires implicit sorting of the data. That's the costly step. I would assume also Proc Freq requires implicit sorting.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;IF the expected number of distinct combinations of col1-col3 is much lower than 1 billion then using a data step hash approach would likely perform much better as it avoids sorting 1billion rows. But that's only going to work if the distinct combinations of col1-col3 fit into memory.&lt;/P&gt;</description>
      <pubDate>Sat, 23 Mar 2019 01:09:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545405#M150871</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2019-03-23T01:09:55Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545428#M150885</link>
      <description>&lt;P&gt;Why are you using CATX()?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;That is for making delimited lists.&amp;nbsp; Because it trims and skips missing values you can have different values of the inputs result in the same output.&amp;nbsp; For example compare when COL2=. and COL3=10&amp;nbsp; with COL2=10 and COL3=. when using CATX('-',col2,col3) both will result with '10' as the result.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Also if you are using CATX() it then why are you use COL1 as the delimiter instead of a constant string, like '/' or '-'?&lt;/P&gt;</description>
      <pubDate>Sat, 23 Mar 2019 14:59:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545428#M150885</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2019-03-23T14:59:12Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545429#M150886</link>
      <description>&lt;P&gt;If you use CATX(sex,age) you are just going to get the same values are if you did CATS(age).&lt;/P&gt;
&lt;P&gt;The first argument to CATX() is the delimiter string.&amp;nbsp; If there is only one other argument then the delimiter is never inserted.&lt;/P&gt;</description>
      <pubDate>Sat, 23 Mar 2019 04:41:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545429#M150886</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2019-03-23T04:41:51Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545457#M150902</link>
      <description>&lt;P&gt;Checking for duplicates always involves sorting, and doing it implicitly with proc sql will often be the least efficient way.&lt;/P&gt;
&lt;P&gt;Try this instead:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort
  data=have (keep=col1 col2 col3)
  out=test
  nodupkey
;
by col1 col2 col3;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;and look at the log. It will tell you how many duplicates were deleted.&lt;/P&gt;</description>
      <pubDate>Sat, 23 Mar 2019 08:39:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545457#M150902</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2019-03-23T08:39:40Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545462#M150905</link>
      <description>&lt;P&gt;If your data set is already sorted by ANY of the three variables, that should help PROC FREQ run faster.&amp;nbsp; For example, if your data set is sorted by COL2, you could use:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc freq data=have noprint;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; &amp;nbsp;by col2;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; &amp;nbsp;tables col1*col3 / out=counts;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The number of observations in COUNTS will be the right total, and PROC FREQ can take advantage of the sorted order.&lt;/P&gt;</description>
      <pubDate>Sat, 23 Mar 2019 10:51:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545462#M150905</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2019-03-23T10:51:49Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545624#M150969</link>
      <description>&lt;P&gt;A point I overlooked.&amp;nbsp;&amp;nbsp; You are using&lt;/P&gt;
&lt;P&gt;&amp;nbsp; &amp;nbsp;count(distinct(catx(col1,col2,col3)))&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It would probably be better practice to use&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; count(distinct(catx(' ',col1,col2,col3)))&lt;/P&gt;
&lt;P&gt;It would certainly be better once you attempt to go beyond 3 variables.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;And finally, if you only want the NUMBER of 3-way combinations, and not the DISTRIBUTION, then a hash object allows you to write out the count to the log in one step, as in:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _null_;
  if 0 then set sashelp.cars (keep=model type origin);
  declare hash h (dataset:'sashelp.cars (keep=model type origin)');
   h.definekey(all:'Y');
   h.definedone();
 n_combos=h.num_items;
 put n_combos=;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 24 Mar 2019 20:02:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545624#M150969</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2019-03-24T20:02:40Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545747#M151031</link>
      <description>&lt;P&gt;edit my question with an additional example, please check and share your answer. Thanks for your effort.&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2019 11:32:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545747#M151031</guid>
      <dc:creator>Srigyan</dc:creator>
      <dc:date>2019-03-25T11:32:17Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545748#M151032</link>
      <description>&lt;P&gt;edited my question with an additional example, please check and share your answer. Thanks for your effort.&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2019 11:32:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545748#M151032</guid>
      <dc:creator>Srigyan</dc:creator>
      <dc:date>2019-03-25T11:32:42Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545752#M151034</link>
      <description>&lt;P&gt;Please make it easier for use to run tests by providing example data in a readily usable form, see my below example:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile datalines dlm='09'x;
input product brand $ date :mmddyy10.;
format date mmddyy10.;
datalines;
1	b1	01/10/2013
1	b3	03/10/2013
1	b3	03/10/2013
1	b4	04/10/2013
2	b1	01/10/2013
2	b2	01/10/2013
2	b3	01/10/2013
2	b4	01/10/2013
1	b1	01/10/2013
4	b1	02/10/2013
5	b3	03/10/2013
5	b3	03/10/2013
6	b4	03/10/2013
6	b4	04/10/2013
;
run;

proc sort
  data=have
  out=test
  dupout=duplicates
  nodupkey
;
by product brand date;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The log from that:&lt;/P&gt;
&lt;PRE&gt;27         data have;
28         infile datalines dlm='09'x;
29         input product brand $ date :mmddyy10.;
30         format date mmddyy10.;
31         datalines;

NOTE: The data set WORK.HAVE has 14 observations and 3 variables.
NOTE: DATA statement used (Total process time):
      real time           0.02 seconds
      cpu time            0.00 seconds
      
46         ;

47         run;
48         
49         proc sort
50           data=have
51           out=test
52           dupout=duplicates
53           nodupkey
54         ;
55         by product brand date;
56         run;

NOTE: There were 14 observations read from the data set WORK.HAVE.
NOTE: 3 observations with duplicate key values were deleted.
NOTE: The data set WORK.TEST has 11 observations and 3 variables.
NOTE: The data set WORK.DUPLICATES has 3 observations and 3 variables.&lt;/PRE&gt;</description>
      <pubDate>Mon, 25 Mar 2019 11:42:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545752#M151034</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2019-03-25T11:42:09Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545754#M151036</link>
      <description>&lt;P&gt;I will create the data set going forward. I need these numbers in a table so I can update this somewhere else. I can't check log for this.&lt;/P&gt;&lt;P&gt;it will be updated in another table where it will say&lt;/P&gt;&lt;P&gt;Total Count=14&lt;/P&gt;&lt;P&gt;Level count=11&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2019 11:44:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545754#M151036</guid>
      <dc:creator>Srigyan</dc:creator>
      <dc:date>2019-03-25T11:44:57Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545778#M151049</link>
      <description>&lt;P&gt;You could just use a sub-query in PROC SQL.&lt;/P&gt;
&lt;P&gt;Into a table:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql noprint;
create table want as 
select sum(nobs) as Total_count,count(*) as Level_count
  from
  (select col1,col2,col3,count(*) as nobs
   from have 
   group by col1,col2,col3
  )
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Or into macro variables:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql noprint;
select sum(nobs) format=32.,count(*) format=32.
  into :total_count trimmed, :level_count trimmed
  from
  (select col1,col2,col3,count(*) as nobs
   from have 
   group by col1,col2,col3
  )
;
quit;
%put Total Count = %sysfunc(putn(&amp;amp;total_count,comma32.-L));
%put Level Count = %sysfunc(putn(&amp;amp;level_count,comma32.-L));&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2019 12:35:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545778#M151049</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2019-03-25T12:35:07Z</dc:date>
    </item>
    <item>
      <title>Re: distinct count</title>
      <link>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545796#M151060</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/200312"&gt;@Srigyan&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;I will create the data set going forward. I need these numbers in a table so I can update this somewhere else. I can't check log for this.&lt;/P&gt;
&lt;P&gt;it will be updated in another table where it will say&lt;/P&gt;
&lt;P&gt;Total Count=14&lt;/P&gt;
&lt;P&gt;Level count=11&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;You can pull the resulting sums from the output of the proc sort from dictionary.tables:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql noprint;
select nobs into :distobs from dictionary.tables where libname = 'WORK' and memname = 'TEST';
select nobs into :dupobs from dictionary.tables where libname = 'WORK' and memname = 'DUPLICATES';
select nobs into :allobs from dictionary.tables where libname = 'WORK' and memname = 'HAVE';
quit;

%put allobs=&amp;amp;allobs. distobs=&amp;amp;distobs. dupobs=&amp;amp;dupobs.;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;or create a table&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as
select "Total Count" as description, nobs as count
from dictionary.tables
where libname = 'WORK' and memname = 'HAVE'
union all
select "Level Count" as description, nobs as count
from dictionary.tables
where libname = 'WORK' and memname = 'TEST'
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You can now compare the performance of the suggested duplicate-detection methods and select which one to use.&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2019 13:23:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/distinct-count/m-p/545796#M151060</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2019-03-25T13:23:25Z</dc:date>
    </item>
  </channel>
</rss>

