BookmarkSubscribeRSS Feed
eceklic
Calcite | Level 5

Hi SAS Community,

 

I have ambulance dispatch data for road crashes. Each line represents an emergency call. I want to identify calls that are for the same road crash. The same road crash means: (1) the TIME is within 6 hours and (2) the latitude/longitude location coordinates are within 60 metres.

 

I know I need to sort for date and time and then maybe use a lead and lag function to look forwards and back records conditionally. Please help 😊

 

DATA HAVE

ID            DATE                     TIME                      LAT                         LONG

1              10MAY2018        06:15                     -32.021797         115.909015      

2              10MAY2018        08:27                     -32.021528         115.909417

3              23JUN2018         08:27                   -17.356261           123.758261

4              31AUG2018        02:21                     18.474158          76.530357

5              31AUG2018        02:22                     40.862430          -0.212537

6             10MAY2018         09:34                     -32.021646         115.909197

 

 

DATA WANT

ID            DATE                     TIME                      LAT                         LONG                       EVENT_ID

1              10MAY2018        06:15                     -32.021797          115.909015                      1

2              10MAY2018        08:27                     -32.021528          115.909417                      1

3              23JUN2018         08:27                     -17.356261          123.758261                      2

4              31AUG2018        02:21                     18.474158           76.530357                        3

5              31AUG2018        02:22                     40.862430           -0.212537                         4

6             10MAY2018         09:34                     -32.021646          115.909197                      1

5 REPLIES 5
Kurt_Bremser
Super User

The "look back" is done with the lag() function, the "look ahead" with a little trick played in the second set statement:

data have;
input ID DATE :date9. TIME :time5. LAT LONG;
format
  date date9.
  time time5.
;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;

data int;
set have;
dt = dhms(date,0,0,time);
format dt datetime19.;
run;

proc sort data=int;
by dt;
run;

data want;
set int;
set
  int (
    firstobs=2
    keep=lat long dt
    rename=(lat=next_lat long=next_long dt=next_dt)
  )
  int (
    obs=1
    drop=_all_
  )
;
prev_lat = lag(lat);
prev_long = lag(long);
prev_dt = lag(dt);
format
  prev_dt next_dt datetime19.
;
run;
Ksharp
Super User

Here could give you a start.

 

data have;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table want as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from have as a,have as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
Ksharp
Super User

OK. Here could get you better.

 

data x;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table a as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from x as a,x as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
data  temp;
 set a;
 node=id;output;
 node=_id;output;
 keep node;
run;
proc sql;
create table b as
select id as from
 from x where id not in (select node from temp);
quit;


data have;
 set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household rename=(node=id household=  EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
Ksharp
Super User

Here get you better.

 

data x;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table a as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from x as a,x as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
data  temp;
 set a;
 node=id;output;
 node=_id;output;
 keep node;
run;
proc sql;
create table b as
select id as from
 from x where id not in (select node from temp);
quit;


data have;
 set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household rename=(node=id household=  EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
eceklic
Calcite | Level 5

Hi KSHarp - WOW, your code is amazing! It worked perfectly! Thank you very much 🙂