Re: How to create a new count variable based off min value of another ...

sms1891 · Posted 12-03-2018 03:57 PM

Dear all,

I have a data set with date difference values (datediff) by ID variable. Some IDs have no datediff and for rest of them with values for datediff, I want to create a new categorical/ count variable (COUNT) picking up the min value of datediff (as 1) and rest of the values as 0. For those with missing datefdiff, I want the new variable (COUNT) as missing. If by any chance if an ID has two of the same minimum datediff value, I just want to count it only once. Therefore COUNT should be 0 for the second observed same minimum datediff value.

This is what I want:

ID datediff Count

1 0 1

1 3 0

1 4 0

1 . .

2 0 1

2 0 0

2 . .

3 6 1

3 7 0

4 . .

5 0 1

5 1 0

5 2 0

6 13 1

6 16 0

Thank you!

SuryaKiran · Posted 12-03-2018 04:18 PM

Hi,

Simply sorting the data by descending and giving a flag to the first record.

data have;
input ID datediff;
datalines;
1 0
1 3
1 4
1 .
2 0
2 0
2 .
3 6
3 7
4 .
5 0
5 10
5 20
6 13
6 16
;
run;

proc sort data=have(where=(datediff ^=.)) out=have_s;
by id datediff;
run;

data have_f;
set have;
by id ;
if first.id then count=1;
else count=0;
run;

data want;
set have_f have(where=(datediff=.));
run;

proc sort data=want;
by id ;
run;

Thanks,
Suryakiran

sms1891 · Posted 12-03-2018 05:02 PM

I think I may have found an easier way...not sure if this gives the right results

proc sort data=have;
by id datediff;
run;

data have_f;
set have;
by id ;
if first.id and datediff ne . then count=1;
else count=0;
run;

novinosrin · Posted 12-03-2018 04:44 PM

data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;
data want;
do until(last.id);
set have;
by id;
if datediff>. then _k=min(datediff,_k);
end;
do until(last.id);
set have;
by id;
count=ifn(datediff ne .,0,datediff);
if datediff>. and _k=datediff and not _f then do;
_f=1;
count=1;
end;
output;
end;
drop _:;
run;

Astounding · Posted 12-03-2018 04:50 PM

This ought to work, but it's trickier than it looks (especially if you want to keep all of the observations from your original data set, in their original order):

proc summary data=have;

by id;

var datediff;

output out=minvals (keep=id datediff) min=;

run;

data want;

set minvals have;

by id;

retain captured_min;

count = 0;

if first.id then do;

captured_min = datediff;

delete;

end;

else do;

if datediff=. then count=.;

else if datediff = captured_min then do;

count=1;

captured_min = .;

end;

drop captured_min;

run;

novinosrin · Posted 12-03-2018 05:13 PM

data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;

proc sort data=have out=_have;
by id  datediff;
run;
data want;
set _have;
by id;
retain f;
if not (first.id and last.id) then count=0;
if first.id then f=.;
if not missing(datediff)and not f then do;
count=1;
f=1;
drop f;
end;
run;

novinosrin · Posted 12-03-2018 05:52 PM


data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;

proc sql;
create table temp(drop=t)  as
select id ,datediff, (min(datediff)=datediff) as t,ifn(calculated t and datediff=., .,calculated t) as count
from have
group by id
order by id,datediff;
quit;


data want;
do _n_=1 by 1 until(last.datediff);
set temp;
by id datediff;
if _n_>1 then count=0;
output;
end;
run;

novinosrin · Posted 12-03-2018 06:53 PM


data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;
data _null_;
if _n_=1 then do;
 dcl hash H (ordered: "A",multidata:'y') ;
   h.definekey  ("ID","datediff") ;
   h.definedata ("ID","datediff","count") ;
   h.definedone () ;
end;
do until(last.id);
set have end=lr;
by id;
if datediff>. then count=0;
rc=h.add();
if not missing(datediff) then _k=min(_k,datediff);
end;
if _k>.;
h.find(key:id,key:_k);
count=1;
RC = H.replacedup() ;
if lr;
h.output(dataset:'want');
run;

How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

Re: How to create a new count variable based off min value of another variable by ID

SAS Innovate 2025: Save the Date

SAS Training: Just a Click Away