Hi SAS Community,
I have ambulance dispatch data for road crashes. Each line represents an emergency call. I want to identify calls that are for the same road crash. The same road crash means: (1) the TIME is within 6 hours and (2) the latitude/longitude location coordinates are within 60 metres.
I know I need to sort for date and time and then maybe use a lead and lag function to look forwards and back records conditionally. Please help 😊
DATA HAVE
ID DATE TIME LAT LONG
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
DATA WANT
ID DATE TIME LAT LONG EVENT_ID
1 10MAY2018 06:15 -32.021797 115.909015 1
2 10MAY2018 08:27 -32.021528 115.909417 1
3 23JUN2018 08:27 -17.356261 123.758261 2
4 31AUG2018 02:21 18.474158 76.530357 3
5 31AUG2018 02:22 40.862430 -0.212537 4
6 10MAY2018 09:34 -32.021646 115.909197 1
The "look back" is done with the lag() function, the "look ahead" with a little trick played in the second set statement:
data have;
input ID DATE :date9. TIME :time5. LAT LONG;
format
date date9.
time time5.
;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
data int;
set have;
dt = dhms(date,0,0,time);
format dt datetime19.;
run;
proc sort data=int;
by dt;
run;
data want;
set int;
set
int (
firstobs=2
keep=lat long dt
rename=(lat=next_lat long=next_long dt=next_dt)
)
int (
obs=1
drop=_all_
)
;
prev_lat = lag(lat);
prev_long = lag(long);
prev_dt = lag(dt);
format
prev_dt next_dt datetime19.
;
run;
Here could give you a start.
data have;
input ID DATE : date9. TIME :time8. LAT LONG;
format date date9. time hhmm.;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
proc sql;
create table want as
select a.*,b.id as _id , geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
from have as a,have as b
where a.time+'06:00:00't>=b.time and a.id ne b.id and
calculated dist<=60
order by 1;
quit;
OK. Here could get you better.
data x;
input ID DATE : date9. TIME :time8. LAT LONG;
format date date9. time hhmm.;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
proc sql;
create table a as
select a.*,b.id as _id , geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
from x as a,x as b
where a.time+'06:00:00't>=b.time and a.id ne b.id and
calculated dist<=60
order by 1;
quit;
data temp;
set a;
node=id;output;
node=_id;output;
keep node;
run;
proc sql;
create table b as
select id as from
from x where id not in (select node from temp);
quit;
data have;
set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household rename=(node=id household= EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
Here get you better.
data x;
input ID DATE : date9. TIME :time8. LAT LONG;
format date date9. time hhmm.;
cards;
1 10MAY2018 06:15 -32.021797 115.909015
2 10MAY2018 08:27 -32.021528 115.909417
3 23JUN2018 08:27 -17.356261 123.758261
4 31AUG2018 02:21 18.474158 76.530357
5 31AUG2018 02:22 40.862430 -0.212537
6 10MAY2018 09:34 -32.021646 115.909197
;
run;
proc sql;
create table a as
select a.*,b.id as _id , geodist( a.LAT , a.LONG,b.LAT , b.LONG,'k')*1000 as dist
from x as a,x as b
where a.time+'06:00:00't>=b.time and a.id ne b.id and
calculated dist<=60
order by 1;
quit;
data temp;
set a;
node=id;output;
node=_id;output;
keep node;
run;
proc sql;
create table b as
select id as from
from x where id not in (select node from temp);
quit;
data have;
set a(keep=id _id rename=(id=from _id=to)) b;
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household rename=(node=id household= EVENT_ID));
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
Hi KSHarp - WOW, your code is amazing! It worked perfectly! Thank you very much 🙂
April 27 – 30 | Gaylord Texan | Grapevine, Texas
Walk in ready to learn. Walk out ready to deliver. This is the data and AI conference you can't afford to miss.
Register now and lock in 2025 pricing—just $495!
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.