BookmarkSubscribeRSS Feed
catkat96
Obsidian | Level 7

Hello, I have a dataset that looks like this:

 

data Have;
input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
format callDate ddmmyy10.;
datalines;
1	15/02/2021	0	0	0	0
1	17/02/2021	0	0	0	0
1	19/02/2021	0	0	1	0
2	08/02/2021	0	0	0	1
2	09/02/2021	1	0	0	0
3	10/02/2021	0	0	0	0
3	11/02/2021	1	0	0	0
3	15/02/2021	1	0	0	0
4	01/02/2021	0	0	0	0
4	02/02/2021	0	0	0	0
4	03/02/2021	0	0	0	0
4	04/02/2021	0	1	0	0
4	05/02/2021	0	0	0	0
4	23/02/2021	0	1	0	0
5   10/02/2021  0   0   0   0
;
run;

I just grabbed a snippet of february's data but there is data for every month, and more data for each month. Basically, a client ID can be contacted many times a month and in each call they can be put in a category. 

My objective is to find out how many UNIQUE ID's entered each category each month. To do this, I need to follow a hierarchy: cat1>cat2>cat3>cat4>no category. So if a client appears in both category 1 and 3 in a month, they should only be counted for category 1, not 3. 

 

Here is how the output should look:

data Have;
input yearmonth :mmyys7. cat1 cat2 cat3 cat4 nocat;
format yearmonth mmyys7.;
datalines;
Feb-21	2	1	1	0	1
;
run;

As you can see:

-ID 1 was counted towards Cat 3

-ID 2 was counted towards Cat 1 only, not 4, because cat1>cat4

-ID 3 was counted towards Cat 1 once

-ID 4 was counted towards Cat 2 once

-ID 5 was counted towards nocat 

 

I believe this is not very complicated but what I've done so far has not been working:

proc sql;
create table test as
select count(distinct id) as unique_id
, month(calldate) as month
, count ( case when cat1=1 then 1 else 0 end) as cat1
from have
group by month;
quit;

proc sql;
create table test2 as
select count(distinct id) as unique_id
, month(calldate) as month
, count (distinct case when cat2=1 then 1 else 0 end) as cat2
from have
group by month;
quit;

proc sql;
create table test3 as
select count(distinct id) as unique_id
, month(calldate) as month
, count (distinct case when cat3=1 then 1 else 0 end) as cat3
from have
group by month;
quit;

proc sql;
create table test4 as
select count(distinct id) as unique_id
, month(calldate) as month
, count (distinct case when cat4=1 then 1 else 0 end) as cat4
from have
group by month;
quit;

I couldn't make it work with year-month either. 

If you could show me a way to solve it in PROC SQL it is very much appreciated, since the rest of my code is all in that format.

4 REPLIES 4
andreas_lds
Jade | Level 19

The complexity is caused by suboptimal data-design.

data Have;
input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
format callDate ddmmyy10.;
datalines;
1  15/02/2021  0  0  0  0
1  17/02/2021  0  0  0  0
1  19/02/2021  0  0  1  0
2  08/02/2021  0  0  0  1
2  09/02/2021  1  0  0  0
3  10/02/2021  0  0  0  0
3  11/02/2021  1  0  0  0
3  15/02/2021  1  0  0  0
4  01/02/2021  0  0  0  0
4  02/02/2021  0  0  0  0
4  03/02/2021  0  0  0  0
4  04/02/2021  0  1  0  0
4  05/02/2021  0  0  0  0
4  23/02/2021  0  1  0  0
5   10/02/2021  0   0   0   0
;
run;

data HaveReduced;
   set have;

   Cat = whichn(1, of cat1-cat4, 1);
   
   drop cat1-cat4;
run;
   
proc summary data=HaveReduced nway;
   class CallDate Id;
   format CallDate mmyy7.;
   var Cat;
   output out=unique(drop=_:) min=;
run;

proc format;
   value Category 
      1 = 'Cat 1'
      2 = 'Cat 2'
      3 = 'Cat 3'
      4 = 'Cat 4'
      5 = 'No Cat'
   ;
run;
   
option missing='0';
proc report data=unique ;
   columns CallDate Cat;
   define CallDate / group 'yearmonth';
   define Cat / n across format= Category. preloadfmt;
run;
options missing='.';

 

 

catkat96
Obsidian | Level 7
That looks good, I have a question though, how could I do to get a dataset instead of a report table in the last step?
rudfaden
Pyrite | Level 9

Maybe something like this

 

data Have;
	input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
	format callDate ddmmyy10.;
	datalines;
1 15/02/2021 0 0 0 0
1 17/02/2021 0 0 0 0
1 19/02/2021 0 0 1 0
2 08/02/2021 0 0 0 1
2 09/02/2021 1 0 0 0
3 10/02/2021 0 0 0 0
3 11/02/2021 1 0 0 0
3 15/02/2021 1 0 0 0
4 01/02/2021 0 0 0 0
4 02/02/2021 0 0 0 0
4 03/02/2021 0 0 0 0
4 04/02/2021 0 1 0 0
4 05/02/2021 0 0 0 0
4 23/02/2021 0 1 0 0
5 10/02/2021 0 0 0 0
;
run;

proc sort data=have;
	by id cat1 cat2 cat3 cat4;
run;

data _temp;
	set have;
	by id cat1 cat2 cat3 cat4;

	if last.id then
		output;
run;

proc sql;
	select month(callDate) as calldateMonth
		,sum(cat1) as cat1
		,sum(cat2) as cat2
		,sum(cat3) as cat3
		,sum(cat4) as cat4
	from _temp
		group by calldateMonth
	;
quit;
Ksharp
Super User

SQL is not right tool for this scenario .

 

data Have;
input ID callDate :ddmmyy. cat1 cat2 cat3 cat4;
format callDate ddmmyy10.;
datalines;
1	15/02/2021	0	0	0	0
1	17/02/2021	0	0	0	0
1	19/02/2021	0	0	1	0
2	08/02/2021	0	0	0	1
2	09/02/2021	1	0	0	0
3	10/02/2021	0	0	0	0
3	11/02/2021	1	0	0	0
3	15/02/2021	1	0	0	0
4	01/02/2021	0	0	0	0
4	02/02/2021	0	0	0	0
4	03/02/2021	0	0	0	0
4	04/02/2021	0	1	0	0
4	05/02/2021	0	0	0	0
4	23/02/2021	0	1	0	0
5   10/02/2021  0   0   0   0
;
run;
proc sql;
create table temp as
select id,intnx('month',callDate,0) as callDate format monyy7.,
sum(cat1) as cat1,sum(cat2) as cat2,sum(cat3) as cat3,sum(cat4) as cat4
 from have
  group by id,calculated callDate;

create table temp2 as
select id,callDate,
case when cat1 ne 0 then 1 else 0 end as cat1,
case when cat2 ne 0 and cat1=0 then 1 else 0 end as cat2,
case when cat3 ne 0 and cat1=0 and cat2=0 then 1 else 0 end as cat3,
case when cat4 ne 0 and cat1=0 and cat2=0 and cat3=0 then 1 else 0 end as cat4
 from temp;

create table want as
select distinct callDate,
(select count(distinct id) from temp2 where callDate=a.callDate and cat1=1) as cat1,
(select count(distinct id) from temp2 where callDate=a.callDate and cat2=1) as cat2,
(select count(distinct id) from temp2 where callDate=a.callDate and cat3=1) as cat3,
(select count(distinct id) from temp2 where callDate=a.callDate and cat4=1) as cat4,
(select count(distinct id) from temp2 where callDate=a.callDate 
 and cat1=0 and cat2=0 and cat3=0 and cat4=0 ) as nocat

 from temp2 as a ;
quit;

 

 

sas-innovate-2024.png

Available on demand!

Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 4 replies
  • 545 views
  • 0 likes
  • 4 in conversation