<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Count distinct using case when in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645161#M192820</link>
    <description>&lt;P&gt;You could try:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc sql;
select
	count(distinct case when missing(comp_id) then upcase(compress(name," ,.-")) else put(comp_id, best.) end) as n
from test;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 05 May 2020 04:48:06 GMT</pubDate>
    <dc:creator>PGStats</dc:creator>
    <dc:date>2020-05-05T04:48:06Z</dc:date>
    <item>
      <title>Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/644969#M192733</link>
      <description>&lt;P&gt;From my not-so-clean data I want to count as correctly as possible the number of unique companies in the data. Each record has a company identifier number and a company name, however, the identifier can be missing. The identifier is assumed to be the most correct, but when missing I need to resort to the company name. I have managed to get the sum using many steps, but I am wondering if it can be done in less steps using "case when" or something else?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Mock data:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
input record comp_ID Name $15.;
datalines;
1 101 SPORTSSHOP
2 101 SportsShop
3 101 Sports Shop
4 102 FoodShop
5 . ClothesShop
6 . CLOTHESSHOP 
7 . Electronics Co.
;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The correct count would be 4 (2 based on comp_ID and 2 based on name).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My short attempt gives the wrong count:&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table count as
select distinct 
(case when comp_ID is not missing then count(distinct(comp_ID))
when comp_ID is missing then count(distinct(upcase(Name))) end) as n_comp_temp
from test;
quit;

proc sql;
create table sum as
select sum(n_comp_temp) as n_comp
from count;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The long way that works fine, but can most likely be reduced to less steps:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table test2 as 
	select *,
	(case when comp_ID is not missing then 1
	when comp_ID is missing then 0 
	end) as ID_present
from test;
quit;

proc sql;
create table test3 as
select distinct ID_present
, count(distinct(comp_ID)) as n_IDs
, count(distinct(upcase(Name))) as n_names
from test2
group by ID_present;
quit;

proc sql;
create table n_IDs as
select n_IDs as n_comp
from test3
where ID_present = 1;
quit;

proc sql;
create table n_names as
select n_names as n_comp
from test3
where ID_present = 0;
quit;

data count;
set n_IDs n_names;
run;

proc sql;
create table count2 as
select sum(n_comp) as n_comp
from count;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 04 May 2020 12:48:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/644969#M192733</guid>
      <dc:creator>SarahDew</dc:creator>
      <dc:date>2020-05-04T12:48:19Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/644975#M192735</link>
      <description>&lt;P&gt;HI&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/244638"&gt;@SarahDew&lt;/a&gt;&amp;nbsp; Would this come close to meet your expectation?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
data test;
input record comp_ID Name $15.;
datalines;
1 101 SPORTSSHOP
2 101 SportsShop
3 101 Sports Shop
4 102 FoodShop
5 . ClothesShop
6 . CLOTHESSHOP 
7 . Electronics Co.
;
run;
/*List report*/
proc sql;
create table want_list as
select distinct comp_id,compress(upcase(name)) as name,ifc(missing(comp_ID),'name','comp_id') as based_on
from test
group by based_on;
quit;
/*Count report*/
proc sql;
create table want_count as
select ifc(missing(comp_ID),'name','comp_id') as based_on, count( distinct compress(upcase(name))) as count
from test
group by based_on;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 04 May 2020 13:11:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/644975#M192735</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2020-05-04T13:11:53Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/644992#M192737</link>
      <description>&lt;P&gt;You should nest the COUNT() and CASE in the other order.&lt;/P&gt;
&lt;P&gt;For example you could count distinct COMP_ID (which will ignore the missing values) and then count the distinct NAMEs only for records that have missing COMP_ID.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;select
  count(distinct comp_id) as n_comp_ids
, count(distinct case when (missing(comp_id)) then name else ' ' end) as n_other_names
from test
;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 04 May 2020 13:34:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/644992#M192737</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2020-05-04T13:34:37Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645159#M192818</link>
      <description>&lt;P&gt;What if your sample data had an 8th record, with no ID, but a name matching another record that did have a ID, such as below:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
  input record comp_ID Name $15.;
datalines;
1 101 SPORTSSHOP
2 101 SportsShop
3 101 Sports Shop
4 102 FoodShop
5 . ClothesShop
6 . CLOTHESSHOP 
7 . Electronics Co.
8 . foodshop
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Then you can no longer do a simple count of distinct names for the records without id's.&amp;nbsp; You would have to reduce it be the number of distinct names &lt;STRIKE&gt;also&lt;/STRIKE&gt; NOT found in records with COMP_ID, as here:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want (keep=N_:);
  set test (where=(comp_id^=.))
      test (where=(comp_id=.))
    end=end_of_testdata;

  if _n_=1 then do;
     declare hash hid ();  /*count unique company id's */
       hid.definekey('comp_id');
       hid.definedata('comp_id');
       hid.definedone();
     declare hash hname(); /*track unique names for records having company id's*/
       hname.definekey('name');
       hname.definedata('name');
       hname.definedone();
     declare hash noid (); /*track unique names not found in above */
       noid.definekey('name');
       noid.definedata('name');
       noid.definedone();
  end;
  name=upcase(name);
  if comp_id^=. then do;
     hid.replace();
     hname.replace();
  end;
  else if hname.check()^=0 then noid.replace();
  if end_of_testdata ;
  n_compid=hid.num_items;
  n_other=noid.num_items;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 05 May 2020 04:51:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645159#M192818</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2020-05-05T04:51:49Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645161#M192820</link>
      <description>&lt;P&gt;You could try:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc sql;
select
	count(distinct case when missing(comp_id) then upcase(compress(name," ,.-")) else put(comp_id, best.) end) as n
from test;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 05 May 2020 04:48:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645161#M192820</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2020-05-05T04:48:06Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645276#M192869</link>
      <description>&lt;P&gt;Thanks for pointing out this possible issue. I had a look at the data and indeed I can see some issues there too. I will look into it to see if I can further improve the count with your method.&lt;/P&gt;</description>
      <pubDate>Tue, 05 May 2020 12:48:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645276#M192869</guid>
      <dc:creator>SarahDew</dc:creator>
      <dc:date>2020-05-05T12:48:48Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct using case when</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645277#M192870</link>
      <description>This seems to do exactly as asked and in the most concise way. I like it. Thanks</description>
      <pubDate>Tue, 05 May 2020 12:50:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-using-case-when/m-p/645277#M192870</guid>
      <dc:creator>SarahDew</dc:creator>
      <dc:date>2020-05-05T12:50:43Z</dc:date>
    </item>
  </channel>
</rss>

