BookmarkSubscribeRSS Feed
dmlouis
Calcite | Level 5

I have geocoded 2 datasets and am looking to determine how close items are from one dataset according to the second dataset. To do so I have the data looping through dataset A and comparing to the data in dataset B, then moving on to the next line and so on. For each loop I want to store the smallest three distances and store them. I am able to do this individually plugging in the lat/long, but when I start trying it with the loop, I don't get the correct data being stored. 

Here is the input data:

LTC_ll:
Y	X
33.60665052	-117.2356103
33.94426376	-117.584795
33.73560974	-116.1868479
33.91821479	-117.464618
33.76410049	-117.0097073
33.763601	-117.0123175
33.57726067	-117.2166387
33.81956554	-116.5271946
33.94678775	-117.5255383

pharm_ll:
Y	X	Provider_Pin
33.907012	-117.470026	MH1192820
33.70836746	-116.2338597	MH1229820
34.10705027	-117.6354387	MH1195216
33.91427037	-117.2916242	MH1257604
33.83029127	-117.235193	MH1207869
34.080484	-117.5785367	MH1241192
34.058434	-117.30392	MH1192780
33.59712084	-117.236783	MH1192855
34.070319	-117.226215	MH1195214
33.52529125	-117.1667247	MH1195215
33.85646951	-117.5559446	MH1192822
34.4247217	-117.3729584	MH1303889
34.099411	-117.626809	MH1192789
34.07225217	-117.4356723	MH1192694
34.03301977	-117.6110217	MH1192776
33.89542399	-117.1912384	MH1241051
33.74745614	-116.9907403	MH1303820
33.907012	-117.470026	MH1192741
33.627034	-117.166653	MH1272023
33.907012	-117.470026	MH1192821
33.787341	-116.379486	MH1203616
34.14095204	-117.2498498	MH1192758
33.93463154	-117.2437705	MH1192837
34.05951494	-117.2232483	MH1000216
33.837615	-116.546767	MH1259388
34.00011234	-117.720855	MH1269921
34.03301977	-117.6110217	MH1192743
34.50508314	-117.3265321	MH1192752
34.072652	-117.435669	MH1192695
33.976325	-117.330414	PI1032934

 

Here is the code I currently have:

/*pulling locations lat/long, state and PIN to be able to go back and identify*/
proc sql;
	create table lat_long as
	select Y, X, state, Provider_Pin /*adjust data so it must state ID*/
	from pharm_ll;

/*setting number of observations for pharm and the LTC, this initiates the loop*/
data _null_;
call symputx('nlls',obs);
stop;
set lat_long nobs=obs;
run;
/*Initiate the number of times the data will be looped*/
data _null_;
call symputx('xlls',obs);
stop;
set LTC_ll nobs=obs;
run;

%put &xlls;

proc sql;
drop table stored_dist;
quit;
proc sql;
drop table LTC_dist_measure;
quit;

/*setting macro to loop through all LTC facilities and then loop through pharm, using google to map the distance*/
%macro LTC_loop;
/*storing each iteration of the loop in ll1*/
%do h=1 %to &xlls;
data _null_;
	hrec = &h;
	set LTC_ll point=hrec;
	call symputx('ll1', catx(',',y,x));
	stop;
	
* create a macro that contains a loop to access Google Maps multiple time;
	%macro distance_time;

/* clear the log on each iteration */
	dm 'clear log';

* delete any data set named DISTANCE_TIME that might exist in the WORK library;
	proc datasets lib=work nolist;
	delete distance_time;
	quit;
/*storing pharm geocode in variable ll2 */
	%do j=1 %to &nlls;
	data _null_;
	nrec = &j;
	set lat_long point=nrec;
	call symputx('ll2', catx(',',y,x));
	stop;
	run;

* lat/long of centroid of zip 12203 hard-coded as part of the URL;
	filename x url "https: // www .google. com/maps/dir/&ll1/&ll2/?force=lite"; \*editing for SAS communities*\
	filename z temp;
 
* same technique used in the example with a pair of lat/long coodinates;
	data _null_; 
	infile x recfm=f lrecl=1 end=eof; 
	file z recfm=f lrecl=1;
	input @1 x $char1.; 
	put @1 x $char1.;
	if eof;
	call symputx('filesize',_n_);
	run;
 
* drive time as a numeric variable;
	data temp;
	infile z recfm=f lrecl=&filesize. eof=done;
	input @ 'miles' +(-15) @ '"' distance :comma12. text $30.;
	units    = scan(text,1,'"');
	text     = scan(text,3,'"');
* convert times to seconds;
  	select;
* combine days and hours;
   	when (find(text,'d') ne 0) time = sum(86400*input(scan(text,1,' '),best.), 
                                        3600*input(scan(text,3,' '),best.));
* combine hours and minutes;
   	when (find(text,'h') ne 0) time = sum(3600*input(scan(text,1,' '),best.), 
                                        60*input(scan(text,3,' '),best.));
* just minutes;
   	otherwise                  time = 60*input(scan(text,1,' '),best.);
  	end;
	output; 
	keep distance time;
	stop;
	done:
	output;
	run;

	filename x clear;
	filename z clear;
 
* add an observation to the data set DISTANCE_TIME;
	proc append base=distance_time data=temp;
	run;
	proc sql;
	drop table temp;
	quit;
	%end;
	%mend;
 
* use the macro;
	%distance_time;
 
*
add variables from original data set to new data set distance_time
use geodist function to calculate straight line distance
;
	data distance_time;
	set distance_time;
	set lat_long point=_n_;
	straight_line = round(geodist(&ll1,y,x,'DM'), 0.01);
	run;

/*creating variables for the top 3 closest*/
	proc sort data=distance_time;
		by distance;
		run;
	data top3;
		set distance_time;
		keep time distance Provider_Pin;
		format time time6.;
		if _N_ <=3 then output; /*change observations here if desired more*/
	run;

/*formats data for top ID and distance*/
	proc summary data=top3 nway;
	output out=pharm_form (drop=_:)
		idgroup(out[3] (Provider_Pin distance)=)/autoname;
	run;

/*	proc print data=pharm_form;*/
/*	run;*/
		%put &ll1;

/*testing here*/
proc append base=stored_dist data=pharm_form;
run;
*Clean up tables for space maintenance and prevent errors;
proc sql;
drop table pharm_form;
quit;
/*end test*/
    	%end;
	run;
%mend;

%LTC_loop;

data LTC_dist_measure;
set LTC_ll;
set stored_dist point=_n_;
run;

 

1 REPLY 1
Patrick
Opal | Level 21

If it's just the geo-distance you need then use SAS function geodistance(). That's how below working code does it.

 

If you need the information from Google and that's a link we can't just use as well for testing then please provide your working code for a single call. This will make it much easier for us to then propose the "generalized" version without being able to test it.

Ideally the working code you provide is as simple as possible and just contains the part you need help with.

 

data have_1;
  infile datalines truncover dlm=' ';
  input (lat_1 long_1) (:best32.);
  datalines;
33.60665052 -117.2356103
33.94426376 -117.584795
33.73560974 -116.1868479
33.91821479 -117.464618
33.76410049 -117.0097073
33.763601 -117.0123175
33.57726067 -117.2166387
33.81956554 -116.5271946
33.94678775 -117.5255383
;

data have_2;
  infile datalines truncover dlm=' ';
  input (lat_2 long_2) (:best32.) provider_pin :$20.;
  datalines;
33.907012 -117.470026 MH1192820
33.70836746 -116.2338597 MH1229820
34.10705027 -117.6354387 MH1195216
33.91427037 -117.2916242 MH1257604
33.83029127 -117.235193 MH1207869
34.080484 -117.5785367 MH1241192
34.058434 -117.30392 MH1192780
33.59712084 -117.236783 MH1192855
34.070319 -117.226215 MH1195214
33.52529125 -117.1667247 MH1195215
33.85646951 -117.5559446 MH1192822
34.4247217 -117.3729584 MH1303889
34.099411 -117.626809 MH1192789
34.07225217 -117.4356723 MH1192694
34.03301977 -117.6110217 MH1192776
33.89542399 -117.1912384 MH1241051
33.74745614 -116.9907403 MH1303820
33.907012 -117.470026 MH1192741
33.627034 -117.166653 MH1272023
33.907012 -117.470026 MH1192821
33.787341 -116.379486 MH1203616
34.14095204 -117.2498498 MH1192758
33.93463154 -117.2437705 MH1192837
34.05951494 -117.2232483 MH1000216
33.837615 -116.546767 MH1259388
34.00011234 -117.720855 MH1269921
34.03301977 -117.6110217 MH1192743
34.50508314 -117.3265321 MH1192752
34.072652 -117.435669 MH1192695
33.976325 -117.330414 PI1032934
;

data inter;
  set have_2;
  do _i=1 to _nobs;
    set have_1 nobs=_nobs point=_i;
    distance=geodist(lat_1, long_1, lat_2, long_2, 'k');
    output;
  end;
run;

proc sort data=inter;
  by provider_pin distance;
run;

data want;
  set inter;
  by provider_pin distance;
  if first.provider_pin then n=1;
  else n+1;
  if n<=3 then output;
run;

Ready to join fellow brilliant minds for the SAS Hackathon?

Build your skills. Make connections. Enjoy creative freedom. Maybe change the world. Registration is now open through August 30th. Visit the SAS Hackathon homepage.

Register today!
How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 1 reply
  • 283 views
  • 1 like
  • 2 in conversation