Dear all,
I have a data set with date difference values (datediff) by ID variable. Some IDs have no datediff and for rest of them with values for datediff, I want to create a new categorical/ count variable (COUNT) picking up the min value of datediff (as 1) and rest of the values as 0. For those with missing datefdiff, I want the new variable (COUNT) as missing. If by any chance if an ID has two of the same minimum datediff value, I just want to count it only once. Therefore COUNT should be 0 for the second observed same minimum datediff value.
This is what I want:
ID datediff Count
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
Thank you!
Hi,
Simply sorting the data by descending and giving a flag to the first record.
data have;
input ID datediff;
datalines;
1 0
1 3
1 4
1 .
2 0
2 0
2 .
3 6
3 7
4 .
5 0
5 10
5 20
6 13
6 16
;
run;
proc sort data=have(where=(datediff ^=.)) out=have_s;
by id datediff;
run;
data have_f;
set have;
by id ;
if first.id then count=1;
else count=0;
run;
data want;
set have_f have(where=(datediff=.));
run;
proc sort data=want;
by id ;
run;
I think I may have found an easier way...not sure if this gives the right results
proc sort data=have;
by id datediff;
run;
data have_f;
set have;
by id ;
if first.id and datediff ne . then count=1;
else count=0;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
data want;
do until(last.id);
set have;
by id;
if datediff>. then _k=min(datediff,_k);
end;
do until(last.id);
set have;
by id;
count=ifn(datediff ne .,0,datediff);
if datediff>. and _k=datediff and not _f then do;
_f=1;
count=1;
end;
output;
end;
drop _:;
run;
This ought to work, but it's trickier than it looks (especially if you want to keep all of the observations from your original data set, in their original order):
proc summary data=have;
by id;
var datediff;
output out=minvals (keep=id datediff) min=;
run;
data want;
set minvals have;
by id;
retain captured_min;
count = 0;
if first.id then do;
captured_min = datediff;
delete;
end;
else do;
if datediff=. then count=.;
else if datediff = captured_min then do;
count=1;
captured_min = .;
end;
end;
drop captured_min;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
proc sort data=have out=_have;
by id datediff;
run;
data want;
set _have;
by id;
retain f;
if not (first.id and last.id) then count=0;
if first.id then f=.;
if not missing(datediff)and not f then do;
count=1;
f=1;
drop f;
end;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
proc sql;
create table temp(drop=t) as
select id ,datediff, (min(datediff)=datediff) as t,ifn(calculated t and datediff=., .,calculated t) as count
from have
group by id
order by id,datediff;
quit;
data want;
do _n_=1 by 1 until(last.datediff);
set temp;
by id datediff;
if _n_>1 then count=0;
output;
end;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
data _null_;
if _n_=1 then do;
dcl hash H (ordered: "A",multidata:'y') ;
h.definekey ("ID","datediff") ;
h.definedata ("ID","datediff","count") ;
h.definedone () ;
end;
do until(last.id);
set have end=lr;
by id;
if datediff>. then count=0;
rc=h.add();
if not missing(datediff) then _k=min(_k,datediff);
end;
if _k>.;
h.find(key:id,key:_k);
count=1;
RC = H.replacedup() ;
if lr;
h.output(dataset:'want');
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.