Dear all,
I have a data set with date difference values (datediff) by ID variable. Some IDs have no datediff and for rest of them with values for datediff, I want to create a new categorical/ count variable (COUNT) picking up the min value of datediff (as 1) and rest of the values as 0. For those with missing datefdiff, I want the new variable (COUNT) as missing. If by any chance if an ID has two of the same minimum datediff value, I just want to count it only once. Therefore COUNT should be 0 for the second observed same minimum datediff value.
This is what I want:
ID datediff Count
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
Thank you!
Hi,
Simply sorting the data by descending and giving a flag to the first record.
data have;
input ID datediff;
datalines;
1 0
1 3
1 4
1 .
2 0
2 0
2 .
3 6
3 7
4 .
5 0
5 10
5 20
6 13
6 16
;
run;
proc sort data=have(where=(datediff ^=.)) out=have_s;
by id datediff;
run;
data have_f;
set have;
by id ;
if first.id then count=1;
else count=0;
run;
data want;
set have_f have(where=(datediff=.));
run;
proc sort data=want;
by id ;
run;
I think I may have found an easier way...not sure if this gives the right results
proc sort data=have;
by id datediff;
run;
data have_f;
set have;
by id ;
if first.id and datediff ne . then count=1;
else count=0;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
data want;
do until(last.id);
set have;
by id;
if datediff>. then _k=min(datediff,_k);
end;
do until(last.id);
set have;
by id;
count=ifn(datediff ne .,0,datediff);
if datediff>. and _k=datediff and not _f then do;
_f=1;
count=1;
end;
output;
end;
drop _:;
run;
This ought to work, but it's trickier than it looks (especially if you want to keep all of the observations from your original data set, in their original order):
proc summary data=have;
by id;
var datediff;
output out=minvals (keep=id datediff) min=;
run;
data want;
set minvals have;
by id;
retain captured_min;
count = 0;
if first.id then do;
captured_min = datediff;
delete;
end;
else do;
if datediff=. then count=.;
else if datediff = captured_min then do;
count=1;
captured_min = .;
end;
end;
drop captured_min;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
proc sort data=have out=_have;
by id datediff;
run;
data want;
set _have;
by id;
retain f;
if not (first.id and last.id) then count=0;
if first.id then f=.;
if not missing(datediff)and not f then do;
count=1;
f=1;
drop f;
end;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
proc sql;
create table temp(drop=t) as
select id ,datediff, (min(datediff)=datediff) as t,ifn(calculated t and datediff=., .,calculated t) as count
from have
group by id
order by id,datediff;
quit;
data want;
do _n_=1 by 1 until(last.datediff);
set temp;
by id datediff;
if _n_>1 then count=0;
output;
end;
run;
data have;
input ID datediff ;
cards;
1 0 1
1 3 0
1 4 0
1 . .
2 0 1
2 0 0
2 . .
3 6 1
3 7 0
4 . .
5 0 1
5 1 0
5 2 0
6 13 1
6 16 0
;
data _null_;
if _n_=1 then do;
dcl hash H (ordered: "A",multidata:'y') ;
h.definekey ("ID","datediff") ;
h.definedata ("ID","datediff","count") ;
h.definedone () ;
end;
do until(last.id);
set have end=lr;
by id;
if datediff>. then count=0;
rc=h.add();
if not missing(datediff) then _k=min(_k,datediff);
end;
if _k>.;
h.find(key:id,key:_k);
count=1;
RC = H.replacedup() ;
if lr;
h.output(dataset:'want');
run;
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.