I divided the date range into bins and required that the best date (closest to the center of the bin) be Inside the bin. It should work for any number of bins > 1.
data have;
input ID DateStr $ Result $;
/* Grrrr! */
date = input(cats(substr(dateStr,4,2),substr(dateStr,1,3),substr(dateStr,7)),date7.);
format date yymmdd10.;
drop dateStr;
datalines;
001 Jan01/10 Pos
001 Mar28/12 Pos
001 Jan30/13 Neg
001 May04/12 Pos
001 Jun20/10 Neg
002 Jan01/10 Neg
002 Mar28/12 Neg
002 Feb28/13 Neg
002 May04/12 Neg
002 Jun20/10 Neg
003 Oct01/14 Neg
003 Mar28/12 Pos
003 Jan30/14 Neg
004 May04/12 Pos
005 Jun29/16 Neg
006 Jan01/10 Pos
006 Mar28/12 Pos
;
%let nPoints=3;
proc means data=have noprint;
by id;
var date;
output out=ranges min= max= range= / autoname;
run;
data bins;
set ranges;
if date_range = 0 then do;
binStart = date_min;
binEnd = date_min;
binMid = date_min;
output;
end;
else do;
width = date_range / (&nPoints-1);
do binStart = date_min - 0.5*width to date_max - 0.5*width by width;
binEnd = binStart + width;
binMid = ( binStart + binEnd ) / 2;
output;
end;
end;
format bin: yymmdd10.;
keep id bin: ;
run;
proc sql;
create table cases as
select unique
b.id,
round(b.binMid) as midDate format=yymmdd10.,
a.date,
a.result
from have as a right join
bins as b on a.id=b.id and a.date between b.binStart and b.binEnd
group by b.id, b.binMid
having abs(a.date-b.binMid) = min(abs(a.date-b.binMid));
select * from cases;
quit;
ID midDate date Result
------------------------------------------
1 2010-01-01 2010-01-01 Pos
1 2011-07-18 2012-03-28 Pos
1 2013-01-30 2013-01-30 Neg
2 2010-01-01 2010-01-01 Neg
2 2011-08-01 2012-03-28 Neg
2 2013-02-28 2013-02-28 Neg
3 2012-03-28 2012-03-28 Pos
3 2013-06-30 2014-01-30 Neg
3 2014-10-01 2014-10-01 Neg
4 2012-05-04 2012-05-04 Pos
5 2016-06-29 2016-06-29 Neg
6 2010-01-01 2010-01-01 Pos
6 2011-02-14 .
6 2012-03-28 2012-03-28 Pos
... View more