<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Count distinct with a hierarchy in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726244#M225671</link>
    <description>That looks good, I have a question though, how could I do to get a dataset instead of a report table in the last step?</description>
    <pubDate>Mon, 15 Mar 2021 12:05:34 GMT</pubDate>
    <dc:creator>catkat96</dc:creator>
    <dc:date>2021-03-15T12:05:34Z</dc:date>
    <item>
      <title>Count distinct with a hierarchy</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726235#M225667</link>
      <description>&lt;P&gt;Hello, I have a dataset that looks like this:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
format callDate ddmmyy10.;
datalines;
1	15/02/2021	0	0	0	0
1	17/02/2021	0	0	0	0
1	19/02/2021	0	0	1	0
2	08/02/2021	0	0	0	1
2	09/02/2021	1	0	0	0
3	10/02/2021	0	0	0	0
3	11/02/2021	1	0	0	0
3	15/02/2021	1	0	0	0
4	01/02/2021	0	0	0	0
4	02/02/2021	0	0	0	0
4	03/02/2021	0	0	0	0
4	04/02/2021	0	1	0	0
4	05/02/2021	0	0	0	0
4	23/02/2021	0	1	0	0
5   10/02/2021  0   0   0   0
;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I just grabbed a snippet of february's data but there is data for every month, and more data for each month. Basically, a client ID can be contacted many times a month and in each call they can be put in a category.&amp;nbsp;&lt;/P&gt;&lt;P&gt;My objective is to find out how many UNIQUE ID's entered each category each month. To do this, I need to follow a hierarchy: cat1&amp;gt;cat2&amp;gt;cat3&amp;gt;cat4&amp;gt;no category. So if a client appears in both category 1 and 3 in a month, they should only be counted for category 1, not 3.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is how the output should look:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
input yearmonth :mmyys7. cat1 cat2 cat3 cat4 nocat;
format yearmonth mmyys7.;
datalines;
Feb-21	2	1	1	0	1
;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;As you can see:&lt;/P&gt;&lt;P&gt;-ID 1 was counted towards Cat 3&lt;/P&gt;&lt;P&gt;-ID 2 was counted towards Cat 1 only, not 4, because cat1&amp;gt;cat4&lt;/P&gt;&lt;P&gt;-ID 3 was counted towards Cat 1 once&lt;/P&gt;&lt;P&gt;-ID 4 was counted towards Cat 2 once&lt;/P&gt;&lt;P&gt;-ID 5 was counted towards nocat&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I believe this is not very complicated but what I've done so far has not been working:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table test as
select count(distinct id) as unique_id
, month(calldate) as month
, count ( case when cat1=1 then 1 else 0 end) as cat1
from have
group by month;
quit;

proc sql;
create table test2 as
select count(distinct id) as unique_id
, month(calldate) as month
, count (distinct case when cat2=1 then 1 else 0 end) as cat2
from have
group by month;
quit;

proc sql;
create table test3 as
select count(distinct id) as unique_id
, month(calldate) as month
, count (distinct case when cat3=1 then 1 else 0 end) as cat3
from have
group by month;
quit;

proc sql;
create table test4 as
select count(distinct id) as unique_id
, month(calldate) as month
, count (distinct case when cat4=1 then 1 else 0 end) as cat4
from have
group by month;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I couldn't make it work with year-month either.&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you could show me a way to solve it in PROC SQL it is very much appreciated, since the rest of my code is all in that format.&lt;/P&gt;</description>
      <pubDate>Mon, 15 Mar 2021 11:14:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726235#M225667</guid>
      <dc:creator>catkat96</dc:creator>
      <dc:date>2021-03-15T11:14:31Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct with a hierarchy</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726241#M225670</link>
      <description>&lt;P&gt;The complexity is caused by suboptimal data-design.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
format callDate ddmmyy10.;
datalines;
1  15/02/2021  0  0  0  0
1  17/02/2021  0  0  0  0
1  19/02/2021  0  0  1  0
2  08/02/2021  0  0  0  1
2  09/02/2021  1  0  0  0
3  10/02/2021  0  0  0  0
3  11/02/2021  1  0  0  0
3  15/02/2021  1  0  0  0
4  01/02/2021  0  0  0  0
4  02/02/2021  0  0  0  0
4  03/02/2021  0  0  0  0
4  04/02/2021  0  1  0  0
4  05/02/2021  0  0  0  0
4  23/02/2021  0  1  0  0
5   10/02/2021  0   0   0   0
;
run;

data HaveReduced;
   set have;

   Cat = whichn(1, of cat1-cat4, 1);
   
   drop cat1-cat4;
run;
   
proc summary data=HaveReduced nway;
   class CallDate Id;
   format CallDate mmyy7.;
   var Cat;
   output out=unique(drop=_:) min=;
run;

proc format;
   value Category 
      1 = 'Cat 1'
      2 = 'Cat 2'
      3 = 'Cat 3'
      4 = 'Cat 4'
      5 = 'No Cat'
   ;
run;
   
option missing='0';
proc report data=unique ;
   columns CallDate Cat;
   define CallDate / group 'yearmonth';
   define Cat / n across format= Category. preloadfmt;
run;
options missing='.';
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 15 Mar 2021 11:53:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726241#M225670</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2021-03-15T11:53:11Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct with a hierarchy</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726244#M225671</link>
      <description>That looks good, I have a question though, how could I do to get a dataset instead of a report table in the last step?</description>
      <pubDate>Mon, 15 Mar 2021 12:05:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726244#M225671</guid>
      <dc:creator>catkat96</dc:creator>
      <dc:date>2021-03-15T12:05:34Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct with a hierarchy</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726245#M225672</link>
      <description>&lt;P&gt;Maybe something like this&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
	input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
	format callDate ddmmyy10.;
	datalines;
1 15/02/2021 0 0 0 0
1 17/02/2021 0 0 0 0
1 19/02/2021 0 0 1 0
2 08/02/2021 0 0 0 1
2 09/02/2021 1 0 0 0
3 10/02/2021 0 0 0 0
3 11/02/2021 1 0 0 0
3 15/02/2021 1 0 0 0
4 01/02/2021 0 0 0 0
4 02/02/2021 0 0 0 0
4 03/02/2021 0 0 0 0
4 04/02/2021 0 1 0 0
4 05/02/2021 0 0 0 0
4 23/02/2021 0 1 0 0
5 10/02/2021 0 0 0 0
;
run;

proc sort data=have;
	by id cat1 cat2 cat3 cat4;
run;

data _temp;
	set have;
	by id cat1 cat2 cat3 cat4;

	if last.id then
		output;
run;

proc sql;
	select month(callDate) as calldateMonth
		,sum(cat1) as cat1
		,sum(cat2) as cat2
		,sum(cat3) as cat3
		,sum(cat4) as cat4
	from _temp
		group by calldateMonth
	;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 15 Mar 2021 12:17:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726245#M225672</guid>
      <dc:creator>rudfaden</dc:creator>
      <dc:date>2021-03-15T12:17:08Z</dc:date>
    </item>
    <item>
      <title>Re: Count distinct with a hierarchy</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726250#M225675</link>
      <description>&lt;P&gt;SQL is not right tool for this scenario .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
format callDate ddmmyy10.;
datalines;
1	15/02/2021	0	0	0	0
1	17/02/2021	0	0	0	0
1	19/02/2021	0	0	1	0
2	08/02/2021	0	0	0	1
2	09/02/2021	1	0	0	0
3	10/02/2021	0	0	0	0
3	11/02/2021	1	0	0	0
3	15/02/2021	1	0	0	0
4	01/02/2021	0	0	0	0
4	02/02/2021	0	0	0	0
4	03/02/2021	0	0	0	0
4	04/02/2021	0	1	0	0
4	05/02/2021	0	0	0	0
4	23/02/2021	0	1	0	0
5   10/02/2021  0   0   0   0
;
run;
proc sql;
create table temp as
select id,intnx('month',callDate,0) as callDate format monyy7.,
sum(cat1) as cat1,sum(cat2) as cat2,sum(cat3) as cat3,sum(cat4) as cat4
 from have
  group by id,calculated callDate;

create table temp2 as
select id,callDate,
case when cat1 ne 0 then 1 else 0 end as cat1,
case when cat2 ne 0 and cat1=0 then 1 else 0 end as cat2,
case when cat3 ne 0 and cat1=0 and cat2=0 then 1 else 0 end as cat3,
case when cat4 ne 0 and cat1=0 and cat2=0 and cat3=0 then 1 else 0 end as cat4
 from temp;

create table want as
select distinct callDate,
(select count(distinct id) from temp2 where callDate=a.callDate and cat1=1) as cat1,
(select count(distinct id) from temp2 where callDate=a.callDate and cat2=1) as cat2,
(select count(distinct id) from temp2 where callDate=a.callDate and cat3=1) as cat3,
(select count(distinct id) from temp2 where callDate=a.callDate and cat4=1) as cat4,
(select count(distinct id) from temp2 where callDate=a.callDate 
 and cat1=0 and cat2=0 and cat3=0 and cat4=0 ) as nocat

 from temp2 as a ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 15 Mar 2021 12:49:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Count-distinct-with-a-hierarchy/m-p/726250#M225675</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2021-03-15T12:49:15Z</dc:date>
    </item>
  </channel>
</rss>

