BookmarkSubscribeRSS Feed
eceklic
Calcite | Level 5

Hi SAS Community,

 

I have ambulance dispatch data for road crashes. Each line represents an emergency call. I want to identify calls that are for the same road crash. The same road crash means: (1) the TIME is within 6 hours and (2) the latitude/longitude location coordinates are within 60 metres.

 

I know I need to sort for date and time and then maybe use a lead and lag function to look forwards and back records conditionally. Please help 😊

 

DATA HAVE

ID            DATE                     TIME                      LAT                         LONG

1              10MAY2018        06:15                     -32.021797         115.909015      

2              10MAY2018        08:27                     -32.021528         115.909417

3              23JUN2018         08:27                   -17.356261           123.758261

4              31AUG2018        02:21                     18.474158          76.530357

5              31AUG2018        02:22                     40.862430          -0.212537

6             10MAY2018         09:34                     -32.021646         115.909197

 

 

DATA WANT

ID            DATE                     TIME                      LAT                         LONG                       EVENT_ID

1              10MAY2018        06:15                     -32.021797          115.909015                      1

2              10MAY2018        08:27                     -32.021528          115.909417                      1

3              23JUN2018         08:27                     -17.356261          123.758261                      2

4              31AUG2018        02:21                     18.474158           76.530357                        3

5              31AUG2018        02:22                     40.862430           -0.212537                         4

6             10MAY2018         09:34                     -32.021646          115.909197                      1

5 REPLIES 5
Kurt_Bremser
Super User

The "look back" is done with the lag() function, the "look ahead" with a little trick played in the second set statement:

data have;
input ID DATE :date9. TIME :time5. LAT LONG;
format
  date date9.
  time time5.
;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;

data int;
set have;
dt = dhms(date,0,0,time);
format dt datetime19.;
run;

proc sort data=int;
by dt;
run;

data want;
set int;
set
  int (
    firstobs=2
    keep=lat long dt
    rename=(lat=next_lat long=next_long dt=next_dt)
  )
  int (
    obs=1
    drop=_all_
  )
;
prev_lat = lag(lat);
prev_long = lag(long);
prev_dt = lag(dt);
format
  prev_dt next_dt datetime19.
;
run;
Ksharp
Super User

Here could give you a start.

 

data have;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table want as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from have as a,have as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
Ksharp
Super User

OK. Here could get you better.

 

data x;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table a as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from x as a,x as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
data  temp;
 set a;
 node=id;output;
 node=_id;output;
 keep node;
run;
proc sql;
create table b as
select id as from
 from x where id not in (select node from temp);
quit;


data have;
 set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household rename=(node=id household=  EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
Ksharp
Super User

Here get you better.

 

data x;
input ID            DATE    : date9.                 TIME      :time8.               LAT                         LONG;
format date date9. time hhmm.;
cards;
1              10MAY2018        06:15                     -32.021797         115.909015      
2              10MAY2018        08:27                     -32.021528         115.909417
3              23JUN2018         08:27                   -17.356261           123.758261
4              31AUG2018        02:21                     18.474158          76.530357
5              31AUG2018        02:22                     40.862430          -0.212537
6             10MAY2018         09:34                     -32.021646         115.909197
;
run;
proc sql;
create table a as
select 	a.*,b.id as _id ,	geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
 from x as a,x as b
  where a.time+'06:00:00't>=b.time and a.id ne b.id	and 
  calculated dist<=60
   order by 1;
quit;
data  temp;
 set a;
 node=id;output;
 node=_id;output;
 keep node;
run;
proc sql;
create table b as
select id as from
 from x where id not in (select node from temp);
quit;


data have;
 set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household rename=(node=id household=  EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
eceklic
Calcite | Level 5

Hi KSHarp - WOW, your code is amazing! It worked perfectly! Thank you very much 🙂

SAS Innovate 2025: Save the Date

 SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!

Save the date!

Mastering the WHERE Clause in PROC SQL

SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 5 replies
  • 1217 views
  • 0 likes
  • 3 in conversation