Re: Conditional LEAD and LAG

eceklic · Posted 10-15-2018 01:44 AM

Hi SAS Community,

I have ambulance dispatch data for road crashes. Each line represents an emergency call. I want to identify calls that are for the same road crash. The same road crash means: (1) the TIME is within 6 hours and (2) the latitude/longitude location coordinates are within 60 metres.

I know I need to sort for date and time and then maybe use a lead and lag function to look forwards and back records conditionally. Please help 😊

DATA HAVE

ID DATE TIME LAT LONG

1 10MAY2018 06:15 -32.021797 115.909015

2 10MAY2018 08:27 -32.021528 115.909417

3 23JUN2018 08:27 -17.356261 123.758261

4 31AUG2018 02:21 18.474158 76.530357

5 31AUG2018 02:22 40.862430 -0.212537

6 10MAY2018 09:34 -32.021646 115.909197

DATA WANT

ID DATE TIME LAT LONG EVENT_ID

1 10MAY2018 06:15 -32.021797 115.909015 1

2 10MAY2018 08:27 -32.021528 115.909417 1

3 23JUN2018 08:27 -17.356261 123.758261 2

4 31AUG2018 02:21 18.474158 76.530357 3

5 31AUG2018 02:22 40.862430 -0.212537 4

6 10MAY2018 09:34 -32.021646 115.909197 1

Kurt_Bremser · Posted 10-15-2018 07:10 AM

The "look back" is done with the lag() function, the "look ahead" with a little trick played in the second set statement:

data have;
input ID DATE :date9. TIME :time5. LAT LONG;
format
  date date9.
  time time5.
;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;

data int;
set have;
dt = dhms(date,0,0,time);
format dt datetime19.;
run;

proc sort data=int;
by dt;
run;

data want;
set int;
set
  int (
    firstobs=2
    keep=lat long dt
    rename=(lat=next_lat long=next_long dt=next_dt)
  )
  int (
    obs=1
    drop=_all_
  )
;
prev_lat = lag(lat);
prev_long = lag(long);
prev_dt = lag(dt);
format
  prev_dt next_dt datetime19.
;
run;

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

Ksharp · Posted 10-15-2018 08:48 AM

Here could give you a start.

data have;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table want as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from have as a,have as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;

Ksharp · Posted 10-15-2018 09:16 AM

OK. Here could get you better.

data x;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table a as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from x as a,x as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
data  temp;
 set a;
 node=id;output;
 node=_id;output;
 keep node;
run;
proc sql;
create table b as
select id as from
 from x where id not in (select node from temp);
quit;


data have;
 set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household rename=(node=id household=  EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;

Ksharp · Posted 10-15-2018 09:19 AM

Here get you better.

data x;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table a as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from x as a,x as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
data  temp;
 set a;
 node=id;output;
 node=_id;output;
 keep node;
run;
proc sql;
create table b as
select id as from
 from x where id not in (select node from temp);
quit;


data have;
 set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household rename=(node=id household=  EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;

eceklic · Posted 10-21-2018 07:17 PM

Hi KSHarp - WOW, your code is amazing! It worked perfectly! Thank you very much 🙂

Conditional LEAD and LAG