Solved: Return count of 0 in a Group By SQL Statement

MB_Analyst · Posted 03-01-2019 09:14 AM

I am trying to count observations with two grouping variables, with the output include counts of zero in groups. I've look at this question but having troubling expanding it with two grouping variables.

DATA test_dat;
    INPUT ID group $1. varx;
    DATALINES;
    1 a 63
    1 a 67
    1 b 81
    1 b 63
    2 b 11
    2 b 63
    3 a 53
    3 b 83
    ;
RUN;

I want my output to look like:

ID group count

1 a 2

1 b 2

2 a 0

2 b 2

3 a 1

3 b 1

novinosrin · Posted 03-01-2019 09:32 AM


DATA test_dat;
    INPUT ID group $1. varx;
    DATALINES;
    1 a 63
    1 a 67
    1 b 81
    1 b 63
    2 b 11
    2 b 63
    3 a 53
    3 b 83
    ;
RUN;

proc sql;
create table want as
select a.*,coalesce(count,0) as count
from 
((select * from (select distinct id from test_dat),(select distinct group from test_dat))) a
left join
(select id, group,count(*) as count from test_dat group by id,group) as  b
on a.id=b.id and a.group=b.group
order by a.id,b.group;
quit;

View solution in original post

novinosrin · Posted 03-01-2019 09:32 AM


DATA test_dat;
    INPUT ID group $1. varx;
    DATALINES;
    1 a 63
    1 a 67
    1 b 81
    1 b 63
    2 b 11
    2 b 63
    3 a 53
    3 b 83
    ;
RUN;

proc sql;
create table want as
select a.*,coalesce(count,0) as count
from 
((select * from (select distinct id from test_dat),(select distinct group from test_dat))) a
left join
(select id, group,count(*) as count from test_dat group by id,group) as  b
on a.id=b.id and a.group=b.group
order by a.id,b.group;
quit;

RW9 · Posted 03-01-2019 09:34 AM

There isn't a way for the procedure to know that data which doesn't exist in the dataset should be returned. The simplest method is to create a template of the data you want to see out, then merge that on:

data test_dat;
  input id group $1. varx;
datalines;
1 a 63
1 a 67
1 b 81
1 b 63
2 b 11
2 b 63
3 a 53
3 b 83
;
run;

data templ;
  do id=1 to 3;
    do group="a","b";
      output;
    end;
  end;
run;

proc sql;
  create table want as 
  select coalesce(a.id,b.id) as id,
         coalesce(a.group,b.group)  as group,
         coalesce(a.res,0) as result
  from   (select id,group,count(*) as res from test_dat group by id,group) a
  full join templ b
  on     a.id=b.id
  and    a.group=b.group;
quit;

Andygray · Posted 03-01-2019 09:38 AM

@RW9 You are hardcoding the datastep to get all groups as opposed to @novinosrin solution that cleverly chooses the distinct group*distinct id in the subquery making it fully dynamic.

Astounding · Posted 03-01-2019 09:41 AM

Only because I'm battling a horrible cold and it helps pass the time to address questions here ... here is an approach that might add to your arsenal of tools.

Begin by creating a shell of all possible combinations:

proc sql;
   create table all_ids as select distinct id from have;
   create table all_groups as select distinct group from have;
   create table complete_shell as select * from all_ids, all_groups orderby id, group;
quit;

Then get your counts and merge it in with the complete shell:

proc freq data=have;
   tables id * group / out=counts (keep=id group count) noprint;
run;

data want;
   merge counts complete_shell;
   by id group;
   if count=. then count=0;
run;

ballardw · Posted 03-01-2019 06:24 PM

And a solution using Proc Summary/means:

proc summary data=test_dat completetypes nway;
   class id group;
   output out=junk (drop=_type_ rename=(_freq_=count));
run;

I believe this would more easily scale to 3 or more variables than the SQL solution.

Ksharp · Posted 03-02-2019 06:05 AM

DATA test_dat;
    INPUT ID group $1. varx;
    DATALINES;
    1 a 63
    1 a 67
    1 b 81
    1 b 63
    2 b 11
    2 b 63
    3 a 53
    3 b 83
    ;
RUN;

proc freq data=test_dat noprint;
table id*group/out=want sparse ;
run;

proc print;run;

Ronein · Posted 03-06-2019 01:01 AM

DATA Rawdata;
    INPUT ID group $1. y;
    DATALINES;
    1 a 63
    1 a 67
    1 b 81
    1 b 63
    2 b 11
    2 b 63
    3 a 53
    3 b 83
    ;
RUN;


data Sheled;
do id=1,2,3;
do group="a","b";
output;
end;
end;
run;

PROC TABULATE DATA=Rawdata classdata=Sheled format=comma32.  out=want1;
    var y;
	CLASS ID /	ORDER=UNFORMATTED MISSING;
	CLASS group /	ORDER=UNFORMATTED MISSING;
	Table ID=''*group='' ALL={LABEL="Total"},
          Y=''*N='count'/box='ID,Group';
RUN;

Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Re: Return count of 0 in a Group By SQL Statement

Ready to join fellow brilliant minds for the SAS Hackathon?

Classroom Training Available!