I have geocoded 2 datasets and am looking to determine how close items are from one dataset according to the second dataset. To do so I have the data looping through dataset A and comparing to the data in dataset B, then moving on to the next line and so on. For each loop I want to store the smallest three distances and store them. I am able to do this individually plugging in the lat/long, but when I start trying it with the loop, I don't get the correct data being stored. Here is the input data: LTC_ll:
Y X
33.60665052 -117.2356103
33.94426376 -117.584795
33.73560974 -116.1868479
33.91821479 -117.464618
33.76410049 -117.0097073
33.763601 -117.0123175
33.57726067 -117.2166387
33.81956554 -116.5271946
33.94678775 -117.5255383
pharm_ll:
Y X Provider_Pin
33.907012 -117.470026 MH1192820
33.70836746 -116.2338597 MH1229820
34.10705027 -117.6354387 MH1195216
33.91427037 -117.2916242 MH1257604
33.83029127 -117.235193 MH1207869
34.080484 -117.5785367 MH1241192
34.058434 -117.30392 MH1192780
33.59712084 -117.236783 MH1192855
34.070319 -117.226215 MH1195214
33.52529125 -117.1667247 MH1195215
33.85646951 -117.5559446 MH1192822
34.4247217 -117.3729584 MH1303889
34.099411 -117.626809 MH1192789
34.07225217 -117.4356723 MH1192694
34.03301977 -117.6110217 MH1192776
33.89542399 -117.1912384 MH1241051
33.74745614 -116.9907403 MH1303820
33.907012 -117.470026 MH1192741
33.627034 -117.166653 MH1272023
33.907012 -117.470026 MH1192821
33.787341 -116.379486 MH1203616
34.14095204 -117.2498498 MH1192758
33.93463154 -117.2437705 MH1192837
34.05951494 -117.2232483 MH1000216
33.837615 -116.546767 MH1259388
34.00011234 -117.720855 MH1269921
34.03301977 -117.6110217 MH1192743
34.50508314 -117.3265321 MH1192752
34.072652 -117.435669 MH1192695
33.976325 -117.330414 PI1032934
Here is the code I currently have: /*pulling locations lat/long, state and PIN to be able to go back and identify*/
proc sql;
create table lat_long as
select Y, X, state, Provider_Pin /*adjust data so it must state ID*/
from pharm_ll;
/*setting number of observations for pharm and the LTC, this initiates the loop*/
data _null_;
call symputx('nlls',obs);
stop;
set lat_long nobs=obs;
run;
/*Initiate the number of times the data will be looped*/
data _null_;
call symputx('xlls',obs);
stop;
set LTC_ll nobs=obs;
run;
%put &xlls;
proc sql;
drop table stored_dist;
quit;
proc sql;
drop table LTC_dist_measure;
quit;
/*setting macro to loop through all LTC facilities and then loop through pharm, using google to map the distance*/
%macro LTC_loop;
/*storing each iteration of the loop in ll1*/
%do h=1 %to &xlls;
data _null_;
hrec = &h;
set LTC_ll point=hrec;
call symputx('ll1', catx(',',y,x));
stop;
* create a macro that contains a loop to access Google Maps multiple time;
%macro distance_time;
/* clear the log on each iteration */
dm 'clear log';
* delete any data set named DISTANCE_TIME that might exist in the WORK library;
proc datasets lib=work nolist;
delete distance_time;
quit;
/*storing pharm geocode in variable ll2 */
%do j=1 %to &nlls;
data _null_;
nrec = &j;
set lat_long point=nrec;
call symputx('ll2', catx(',',y,x));
stop;
run;
* lat/long of centroid of zip 12203 hard-coded as part of the URL;
filename x url "https: // www .google. com/maps/dir/&ll1/&ll2/?force=lite"; \*editing for SAS communities*\
filename z temp;
* same technique used in the example with a pair of lat/long coodinates;
data _null_;
infile x recfm=f lrecl=1 end=eof;
file z recfm=f lrecl=1;
input @1 x $char1.;
put @1 x $char1.;
if eof;
call symputx('filesize',_n_);
run;
* drive time as a numeric variable;
data temp;
infile z recfm=f lrecl=&filesize. eof=done;
input @ 'miles' +(-15) @ '"' distance :comma12. text $30.;
units = scan(text,1,'"');
text = scan(text,3,'"');
* convert times to seconds;
select;
* combine days and hours;
when (find(text,'d') ne 0) time = sum(86400*input(scan(text,1,' '),best.),
3600*input(scan(text,3,' '),best.));
* combine hours and minutes;
when (find(text,'h') ne 0) time = sum(3600*input(scan(text,1,' '),best.),
60*input(scan(text,3,' '),best.));
* just minutes;
otherwise time = 60*input(scan(text,1,' '),best.);
end;
output;
keep distance time;
stop;
done:
output;
run;
filename x clear;
filename z clear;
* add an observation to the data set DISTANCE_TIME;
proc append base=distance_time data=temp;
run;
proc sql;
drop table temp;
quit;
%end;
%mend;
* use the macro;
%distance_time;
*
add variables from original data set to new data set distance_time
use geodist function to calculate straight line distance
;
data distance_time;
set distance_time;
set lat_long point=_n_;
straight_line = round(geodist(&ll1,y,x,'DM'), 0.01);
run;
/*creating variables for the top 3 closest*/
proc sort data=distance_time;
by distance;
run;
data top3;
set distance_time;
keep time distance Provider_Pin;
format time time6.;
if _N_ <=3 then output; /*change observations here if desired more*/
run;
/*formats data for top ID and distance*/
proc summary data=top3 nway;
output out=pharm_form (drop=_:)
idgroup(out[3] (Provider_Pin distance)=)/autoname;
run;
/* proc print data=pharm_form;*/
/* run;*/
%put &ll1;
/*testing here*/
proc append base=stored_dist data=pharm_form;
run;
*Clean up tables for space maintenance and prevent errors;
proc sql;
drop table pharm_form;
quit;
/*end test*/
%end;
run;
%mend;
%LTC_loop;
data LTC_dist_measure;
set LTC_ll;
set stored_dist point=_n_;
run;
... View more