Solved: Create new count variable

madsgregers · Posted 04-01-2020 05:52 AM

Hi,

I'm having trouble creating a new variable.

Imagine this fictive dataset:

ID X Y Z

1 23 3 1

2 12 2 0

3 10 3 0

1 80 6 1

2 09 2 0

1 78 7 1

4 14 6 0

5 67 9 1

6 12 5 1

1 89 4 0

I want to create a new variable named 'count' that counts how many times the same ID number appears. Besides that, the new variable 'count' should only count those ID numbers with the variable Z = 1. So in this dataset the variable 'Count' would look like:

ID X Y Z COUNT

1 23 3 1 3

2 12 2 0 0

3 10 3 0 0

1 80 6 1 3

2 09 2 0 0

1 78 7 1 3

4 14 6 0 0

5 67 9 1 1

6 12 5 1 1

1 89 4 0 3

Thanks!

PeterClemmensen · Posted 04-01-2020 06:09 AM

data have;
input ID X Y Z;
datalines;
1 23 3 1
2 12 2 0
3 10 3 0
1 80 6 1
2 09 2 0
1 78 7 1
4 14 6 0
5 67 9 1
6 12 5 1
1 89 4 0
;

data want (drop = rc);
   if _N_ = 1 then do;
      declare hash h ();
      h.definekey ('ID');
      h.definedata ('ID', 'count');
      h.definedone();

      do until (lr);
         set have end=lr;
         if Z = 1 then do;
            if h.find () ne 0 then count = 1;
            else                   count + 1;
            h.replace ();
         end;
      end;
   end;

   set have;

   count = 0;
   rc = h.find();
run;

Result:

ID  X   Y  Z  count 
1   23  3  1  3 
2   12  2  0  0 
3   10  3  0  0 
1   80  6  1  3 
2   9   2  0  0 
1   78  7  1  3 
4   14  6  0  0 
5   67  9  1  1 
6   12  5  1  1 
1   89  4  0  3

The DATA to DATA Step Macro
Blog: SASnrd

View solution in original post

PeterClemmensen · Posted 04-01-2020 06:09 AM

data have;
input ID X Y Z;
datalines;
1 23 3 1
2 12 2 0
3 10 3 0
1 80 6 1
2 09 2 0
1 78 7 1
4 14 6 0
5 67 9 1
6 12 5 1
1 89 4 0
;

data want (drop = rc);
   if _N_ = 1 then do;
      declare hash h ();
      h.definekey ('ID');
      h.definedata ('ID', 'count');
      h.definedone();

      do until (lr);
         set have end=lr;
         if Z = 1 then do;
            if h.find () ne 0 then count = 1;
            else                   count + 1;
            h.replace ();
         end;
      end;
   end;

   set have;

   count = 0;
   rc = h.find();
run;

Result:

ID  X   Y  Z  count 
1   23  3  1  3 
2   12  2  0  0 
3   10  3  0  0 
1   80  6  1  3 
2   9   2  0  0 
1   78  7  1  3 
4   14  6  0  0 
5   67  9  1  1 
6   12  5  1  1 
1   89  4  0  3

The DATA to DATA Step Macro
Blog: SASnrd

Kurt_Bremser · Posted 04-01-2020 06:17 AM

Also using a hash, but with a preliminary step to create the sums:

data have;
input ID $ X Y Z;
datalines;
1     23     3     1
2     12     2     0
3     10     3     0
1     80     6     1
2     09     2     0
1     78     7     1
4     14     6     0
5     67     9     1
6     12     5     1
1     89     4     0
;

proc summary data=have (where=(z = 1));
class id;
output out=sum (drop=_type_ rename=(_freq_=count));
run;

data want;
set have;
if 0 then set sum;
if _n_ = 1 
then do;
  declare hash h (dataset:"sum");
  h.definekey("id");
  h.definedata("count");
  h.definedone();
end;
if h.find() ne 0 then count = 0;
run;

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

madsgregers · Posted 04-01-2020 06:18 AM

Thanks a lot to both of you!

Kurt_Bremser · Posted 04-01-2020 06:34 AM

Note that the rather complicated algorithms are only necessary to keep the original order. If that is not the case, a simple SQL will do it:

proc sql;
create table want as
select *, sum(case when z = 1 then 1 else 0 end) as count
from have
group by id;
quit;

And you can keep the original order if you create a column to order by first:

data have;
input ID $ X Y Z;
ord = _n_;
datalines;
1     23     3     1
2     12     2     0
3     10     3     0
1     80     6     1
2     09     2     0
1     78     7     1
4     14     6     0
5     67     9     1
6     12     5     1
1     89     4     0
;

proc sql;
create table want as
select id, x, y, z, sum(case when z = 1 then 1 else 0 end) as count
from have
group by id
order by ord;
quit;

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

Ksharp · Posted 04-01-2020 09:08 AM

data have;
input ID X Y Z;
datalines;
1 23 3 1
2 12 2 0
3 10 3 0
1 80 6 1
2 09 2 0
1 78 7 1
4 14 6 0
5 67 9 1
6 12 5 1
1 89 4 0
;
proc sql;
create table want as
select *,
coalesce((select count(*) from have where z=1 and id=a.id),0) as count
 from have as a;
quit;

Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Re: Create new count variable

Registration is open