<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Count Unique Observations in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948792#M83726</link>
    <description>&lt;P&gt;Probably should say&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;tables unique_id/noprint;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Wed, 23 Oct 2024 17:32:57 GMT</pubDate>
    <dc:creator>PaigeMiller</dc:creator>
    <dc:date>2024-10-23T17:32:57Z</dc:date>
    <item>
      <title>Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948749#M83721</link>
      <description>&lt;P&gt;I have a very big data set of 500 million observations. When I use the complete data set and count the number of unique observations by different groups (e.g., age, race, sex, state) I get the following error: "unable to insert into temporary index while processing summary functions". When I use 10% of the data set my code runs without errors. Here is my code:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as select age, count(distinct unique_id) as count 
from have 
group by age;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Oct 2024 14:22:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948749#M83721</guid>
      <dc:creator>trevand</dc:creator>
      <dc:date>2024-10-23T14:22:57Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948755#M83722</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=have;
    by age;
run;
proc freq data=have nlevels;
    ods output nlevels=nlevels;
    by age;
    tables name/noprint;
run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Oct 2024 14:38:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948755#M83722</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2024-10-23T14:38:38Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948757#M83723</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/10892"&gt;@PaigeMiller&lt;/a&gt;&amp;nbsp;do you know why proc sql doesn't work with the big data set?&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2024 14:42:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948757#M83723</guid>
      <dc:creator>trevand</dc:creator>
      <dc:date>2024-10-23T14:42:00Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948758#M83724</link>
      <description>&lt;P&gt;&lt;A href="https://communities.sas.com/t5/SAS-Communities-Library/Maxims-of-Maximally-Efficient-SAS-Programmers/ta-p/352068" target="_self"&gt;Maxim 10&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2024 14:43:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948758#M83724</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2024-10-23T14:43:40Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948791#M83725</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/10892"&gt;@PaigeMiller&lt;/a&gt; I don't understand how the code you are proposing gives just the number of unique observations as a new variable. I get an error that variable name cannot be found. &lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2024 17:30:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948791#M83725</guid>
      <dc:creator>trevand</dc:creator>
      <dc:date>2024-10-23T17:30:57Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948792#M83726</link>
      <description>&lt;P&gt;Probably should say&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;tables unique_id/noprint;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Oct 2024 17:32:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948792#M83726</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2024-10-23T17:32:57Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948797#M83727</link>
      <description>&lt;P&gt;Are you working on a PC, or big server?&amp;nbsp; 500M records is getting big for a PC, but probably feasible if you've got enough disk space and/or memory.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PROC SQL is probably running out of resources (disk space or memory).&amp;nbsp; PROC FREQ might as well.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Depending on your memory and your data, you could try a hash table approach.&amp;nbsp; If you make a hash table with age and unique_ID as keys, and just add your data to it, you'll get a table with all the distinct values (because hash tables de-dup by default), and you could output it to a dataset and then count it.&amp;nbsp; But if you have LOTS of unique_id's and not enough memory you could hit memory problems.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you're very patient and have the resources, you could try proc sort nodupkey out=... with the TAGSORT option, and see if that blows up.&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2024 17:52:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948797#M83727</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2024-10-23T17:52:52Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948799#M83728</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort
  data=have (keep=age unique_id)
  out=sorted
  nodupkey
;
by age unique_id;
run;

proc freq data=sorted;
tables age;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;As an alternative.&lt;/P&gt;
&lt;P&gt;Or run this step instead of PROC FREQ:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
set sorted;
by age;
if first.age
then count = 1;
else count + 1;
if last.age;
drop unique_id;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Oct 2024 17:59:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948799#M83728</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2024-10-23T17:59:33Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948849#M83729</link>
      <description>1) You could try to build an index before runing this SQL.&lt;BR /&gt;proc sql;&lt;BR /&gt;create index age on have;&lt;BR /&gt;create index unique_id on have;&lt;BR /&gt;quit;&lt;BR /&gt;&lt;BR /&gt;2)You could try to runing this code under BATCH mode to request more memory of PC.&lt;BR /&gt;"D:\SASHome\SASFoundation\9.4\sas.exe" -nosplash -sysin "c:\temp\temp.sas" -log "c:\temp\temp#Y#m#d-#H-#M-#s.log" -logparm "rollover=auto"  -memsize 90G&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;3) Split you big table into many small sub-tables and make a macro for this PROC SQL.&lt;BR /&gt;</description>
      <pubDate>Thu, 24 Oct 2024 02:26:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948849#M83729</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2024-10-24T02:26:48Z</dc:date>
    </item>
    <item>
      <title>Re: Count Unique Observations</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948881#M83731</link>
      <description>&lt;P&gt;Henderson &amp;amp; Dorfman have a great paper on using hash functions to chunk your data, and then process each chunk and aggregate:&lt;/P&gt;
&lt;P&gt;&lt;A href="https://communities.sas.com/t5/SAS-Global-Forum-Proceedings/Uniform-Hashing-of-Arbitrary-Input-Into-Key-Exclusive-Segments/ta-p/735358" target="_blank"&gt;https://communities.sas.com/t5/SAS-Global-Forum-Proceedings/Uniform-Hashing-of-Arbitrary-Input-Into-Key-Exclusive-Segments/ta-p/735358&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Even without hashing, you could try a simple chunk approach by writing a macro (or whatever code generation technique you like) to get the count once per age group:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
   create table want_18 as select age, count(distinct unique_id) as count 
   from have 
   where age=18;
quit;

proc sql;
   create table want_19 as select age, count(distinct unique_id) as count 
   from have 
   where age=19;
quit;

*etc;

data want;
  set want_: ;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Oct 2024 13:12:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Count-Unique-Observations/m-p/948881#M83731</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2024-10-24T13:12:58Z</dc:date>
    </item>
  </channel>
</rss>

