BookmarkSubscribeRSS Feed
sms1891
Quartz | Level 8

Dear all,

I have a data set with date difference values (datediff) by ID variable. Some IDs have no datediff and for rest of them with values for datediff, I want to create a new categorical/ count variable (COUNT) picking up the min value of datediff (as 1) and rest of the values as 0. For those with missing datefdiff, I want the new variable (COUNT) as missing. If by any chance if an ID has two of the same minimum datediff value, I just want to count it only once. Therefore COUNT should be 0 for the second observed same minimum datediff value.

 

This is what I want:

ID    datediff    Count

1        0              1

1        3              0

1        4              0

1        .               .

2        0              1

2        0              0

2       .                 .

3         6             1

3         7             0

4         .              .

5       0             1

5       1              0

5       2              0

6       13             1

6        16           0

 

 

 

Thank you!

7 REPLIES 7
SuryaKiran
Meteorite | Level 14

Hi,

 

Simply sorting the data by descending and giving a flag to the first record. 

 

data have;
input ID datediff;
datalines;
1 0
1 3
1 4
1 .
2 0
2 0
2 .
3 6
3 7
4 .
5 0
5 10
5 20
6 13
6 16
;
run;

proc sort data=have(where=(datediff ^=.)) out=have_s;
by id datediff;
run;

data have_f;
set have;
by id ;
if first.id then count=1;
else count=0;
run;

data want;
set have_f have(where=(datediff=.));
run;

proc sort data=want;
by id ;
run;
Thanks,
Suryakiran
sms1891
Quartz | Level 8

I think I may have found an easier way...not sure if this gives the right results

proc sort data=have;
by id datediff;
run;

data have_f;
set have;
by id ;
if first.id and datediff ne . then count=1;
else count=0;
run;
novinosrin
Tourmaline | Level 20
data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;
data want;
do until(last.id);
set have;
by id;
if datediff>. then _k=min(datediff,_k);
end;
do until(last.id);
set have;
by id;
count=ifn(datediff ne .,0,datediff);
if datediff>. and _k=datediff and not _f then do;
_f=1;
count=1;
end;
output;
end;
drop _:;
run;
Astounding
PROC Star

This ought to work, but it's trickier than it looks (especially if you want to keep all of the observations from  your original data set, in their original order):

 

proc summary data=have;

   by id;

   var datediff;

   output out=minvals (keep=id datediff) min=;

run;

 

data want;

set minvals have;

by id;

retain captured_min;

count = 0;

if first.id then do;

   captured_min = datediff;

   delete;

end;

else do;

   if datediff=. then count=.;

   else if datediff = captured_min then do;

       count=1;

       captured_min = .;

   end;

end;

drop captured_min;

run;

 

 

novinosrin
Tourmaline | Level 20
data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;

proc sort data=have out=_have;
by id  datediff;
run;
data want;
set _have;
by id;
retain f;
if not (first.id and last.id) then count=0;
if first.id then f=.;
if not missing(datediff)and not f then do;
count=1;
f=1;
drop f;
end;
run;
novinosrin
Tourmaline | Level 20

data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;

proc sql;
create table temp(drop=t)  as
select id ,datediff, (min(datediff)=datediff) as t,ifn(calculated t and datediff=., .,calculated t) as count
from have
group by id
order by id,datediff;
quit;


data want;
do _n_=1 by 1 until(last.datediff);
set temp;
by id datediff;
if _n_>1 then count=0;
output;
end;
run;

novinosrin
Tourmaline | Level 20

data have;
input ID    datediff ;  
cards;
1        0              1
1        3              0
1        4              0
1        .               .
2        0              1
2        0              0
2       .                 .
3         6             1
3         7             0
4         .              .
5       0             1
5       1              0
5       2              0
6       13             1
6        16           0
;
data _null_;
if _n_=1 then do;
 dcl hash H (ordered: "A",multidata:'y') ;
   h.definekey  ("ID","datediff") ;
   h.definedata ("ID","datediff","count") ;
   h.definedone () ;
end;
do until(last.id);
set have end=lr;
by id;
if datediff>. then count=0;
rc=h.add();
if not missing(datediff) then _k=min(_k,datediff);
end;
if _k>.;
h.find(key:id,key:_k);
count=1;
RC = H.replacedup() ;
if lr;
h.output(dataset:'want');
run;

SAS Innovate 2025: Save the Date

 SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

Save the date!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 7 replies
  • 989 views
  • 2 likes
  • 4 in conversation