BookmarkSubscribeRSS Feed
Quartz | Level 8

## How to create a new count variable based off min value of another variable by ID

Dear all,

I have a data set with date difference values (datediff) by ID variable. Some IDs have no datediff and for rest of them with values for datediff, I want to create a new categorical/ count variable (COUNT) picking up the min value of datediff (as 1) and rest of the values as 0. For those with missing datefdiff, I want the new variable (COUNT) as missing. If by any chance if an ID has two of the same minimum datediff value, I just want to count it only once. Therefore COUNT should be 0 for the second observed same minimum datediff value.

This is what I want:

ID    datediff    Count

1        0              1

1        3              0

1        4              0

1        .               .

2        0              1

2        0              0

2       .                 .

3         6             1

3         7             0

4         .              .

5       0             1

5       1              0

5       2              0

6       13             1

6        16           0

Thank you!

7 REPLIES 7
Meteorite | Level 14

## Re: How to create a new count variable based off min value of another variable by ID

Hi,

Simply sorting the data by descending and giving a flag to the first record.

``````data have;
input ID datediff;
datalines;
1 0
1 3
1 4
1 .
2 0
2 0
2 .
3 6
3 7
4 .
5 0
5 10
5 20
6 13
6 16
;
run;

proc sort data=have(where=(datediff ^=.)) out=have_s;
by id datediff;
run;

data have_f;
set have;
by id ;
if first.id then count=1;
else count=0;
run;

data want;
set have_f have(where=(datediff=.));
run;

proc sort data=want;
by id ;
run;``````
Thanks,
Suryakiran
Quartz | Level 8

## Re: How to create a new count variable based off min value of another variable by ID

I think I may have found an easier way...not sure if this gives the right results

``````proc sort data=have;
by id datediff;
run;

data have_f;
set have;
by id ;
if first.id and datediff ne . then count=1;
else count=0;
run;``````
Tourmaline | Level 20

## Re: How to create a new count variable based off min value of another variable by ID

``````data have;
input ID    datediff ;
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;
data want;
do until(last.id);
set have;
by id;
if datediff>. then _k=min(datediff,_k);
end;
do until(last.id);
set have;
by id;
count=ifn(datediff ne .,0,datediff);
if datediff>. and _k=datediff and not _f then do;
_f=1;
count=1;
end;
output;
end;
drop _:;
run;``````
PROC Star

## Re: How to create a new count variable based off min value of another variable by ID

This ought to work, but it's trickier than it looks (especially if you want to keep all of the observations from  your original data set, in their original order):

proc summary data=have;

by id;

var datediff;

output out=minvals (keep=id datediff) min=;

run;

data want;

set minvals have;

by id;

retain captured_min;

count = 0;

if first.id then do;

captured_min = datediff;

delete;

end;

else do;

if datediff=. then count=.;

else if datediff = captured_min then do;

count=1;

captured_min = .;

end;

end;

drop captured_min;

run;

Tourmaline | Level 20

## Re: How to create a new count variable based off min value of another variable by ID

``````data have;
input ID    datediff ;
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;

proc sort data=have out=_have;
by id  datediff;
run;
data want;
set _have;
by id;
retain f;
if not (first.id and last.id) then count=0;
if first.id then f=.;
if not missing(datediff)and not f then do;
count=1;
f=1;
drop f;
end;
run;``````
Tourmaline | Level 20

## Re: How to create a new count variable based off min value of another variable by ID

``````
data have;
input ID    datediff ;
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;

proc sql;
create table temp(drop=t)  as
select id ,datediff, (min(datediff)=datediff) as t,ifn(calculated t and datediff=., .,calculated t) as count
from have
group by id
order by id,datediff;
quit;

data want;
do _n_=1 by 1 until(last.datediff);
set temp;
by id datediff;
if _n_>1 then count=0;
output;
end;
run;

``````
Tourmaline | Level 20

## Re: How to create a new count variable based off min value of another variable by ID

``````
data have;
input ID    datediff ;
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;
data _null_;
if _n_=1 then do;
dcl hash H (ordered: "A",multidata:'y') ;
h.definekey  ("ID","datediff") ;
h.definedata ("ID","datediff","count") ;
h.definedone () ;
end;
do until(last.id);
set have end=lr;
by id;
if datediff>. then count=0;
rc=h.add();
if not missing(datediff) then _k=min(_k,datediff);
end;
if _k>.;
h.find(key:id,key:_k);
count=1;
RC = H.replacedup() ;
if lr;
h.output(dataset:'want');
run;``````
Discussion stats
• 7 replies
• 690 views
• 2 likes
• 4 in conversation