Hi SAS Community,
I have ambulance dispatch data for road crashes. Each line represents an emergency call. I want to identify calls that are for the same road crash. The same road crash means: (1) the TIME is within 6 hours and (2) the latitude/longitude location coordinates are within 60 metres.
I know I need to sort for date and time and then maybe use a lead and lag function to look forwards and back records conditionally. Please help 😊
DATA HAVE
ID DATE TIME LAT LONG
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
DATA WANT
ID DATE TIME LAT LONG EVENT_ID
1 10MAY2018 06:15 -32.021797 115.909015 1
2 10MAY2018 08:27 -32.021528 115.909417 1
3 23JUN2018 08:27 -17.356261 123.758261 2
4 31AUG2018 02:21 18.474158 76.530357 3
5 31AUG2018 02:22 40.862430 -0.212537 4
6 10MAY2018 09:34 -32.021646 115.909197 1
The "look back" is done with the lag() function, the "look ahead" with a little trick played in the second set statement:
data have;
input ID DATE :date9. TIME :time5. LAT LONG;
format
date date9.
time time5.
;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
data int;
set have;
dt = dhms(date,0,0,time);
format dt datetime19.;
run;
proc sort data=int;
by dt;
run;
data want;
set int;
set
int (
firstobs=2
keep=lat long dt
rename=(lat=next_lat long=next_long dt=next_dt)
)
int (
obs=1
drop=_all_
)
;
prev_lat = lag(lat);
prev_long = lag(long);
prev_dt = lag(dt);
format
prev_dt next_dt datetime19.
;
run;
Here could give you a start.
data have;
input ID DATE : date9. TIME :time8. LAT LONG;
format date date9. time hhmm.;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
proc sql;
create table want as
select a.*,b.id as _id , geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
from have as a,have as b
where a.time+'06:00:00't>=b.time and a.id ne b.id and
calculated dist<=60
order by 1;
quit;
OK. Here could get you better.
data x;
input ID DATE : date9. TIME :time8. LAT LONG;
format date date9. time hhmm.;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
proc sql;
create table a as
select a.*,b.id as _id , geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
from x as a,x as b
where a.time+'06:00:00't>=b.time and a.id ne b.id and
calculated dist<=60
order by 1;
quit;
data temp;
set a;
node=id;output;
node=_id;output;
keep node;
run;
proc sql;
create table b as
select id as from
from x where id not in (select node from temp);
quit;
data have;
set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household rename=(node=id household= EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
Here get you better.
data x; input ID DATE : date9. TIME :time8. LAT LONG; format date date9. time hhmm.; cards; 1 10MAY2018 06:15 -32.021797 115.909015 2 10MAY2018 08:27 -32.021528 115.909417 3 23JUN2018 08:27 -17.356261 123.758261 4 31AUG2018 02:21 18.474158 76.530357 5 31AUG2018 02:22 40.862430 -0.212537 6 10MAY2018 09:34 -32.021646 115.909197 ; run; proc sql; create table a as select a.*,b.id as _id , geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist from x as a,x as b where a.time+'06:00:00't>=b.time and a.id ne b.id and calculated dist<=60 order by 1; quit; data temp; set a; node=id;output; node=_id;output; keep node; run; proc sql; create table b as select id as from from x where id not in (select node from temp); quit; data have; set a(keep=id _id rename=(id=from _id=to)) b; run; data full; set have end=last; if _n_ eq 1 then do; declare hash h(); h.definekey('node'); h.definedata('node'); h.definedone(); end; output; node=from; h.replace(); from=to; to=node; output; node=from; h.replace(); if last then h.output(dataset:'node'); drop node; run; data want(keep=node household rename=(node=id household= EVENT_ID)); declare hash ha(ordered:'a'); declare hiter hi('ha'); ha.definekey('count'); ha.definedata('last'); ha.definedone(); declare hash _ha(hashexp: 20); _ha.definekey('key'); _ha.definedone(); if 0 then set full; declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y'); from_to.definekey('from'); from_to.definedata('to'); from_to.definedone(); if 0 then set node; declare hash no(dataset:'node'); declare hiter hi_no('no'); no.definekey('node'); no.definedata('node'); no.definedone(); do while(hi_no.next()=0); household+1; output; count=1; key=node;_ha.add(); last=node;ha.add(); rc=hi.first(); do while(rc=0); from=last;rx=from_to.find(); do while(rx=0); key=to;ry=_ha.check(); if ry ne 0 then do; node=to;output;rr=no.remove(key:node); key=to;_ha.add(); count+1; last=to;ha.add(); end; rx=from_to.find_next(); end; rc=hi.next(); end; ha.clear();_ha.clear(); end; stop; run;
Hi KSHarp - WOW, your code is amazing! It worked perfectly! Thank you very much 🙂
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.