<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Match/Link with multiple passes in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418908#M67776</link>
    <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13711"&gt;@art297&lt;/a&gt;&lt;/P&gt;&lt;P&gt;Amazing art297 as usual.&lt;/P&gt;&lt;P&gt;If I want mentors and students match on other variables a,b,c would I just be able to throw them into sql like below?&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create table distance(keep=mentor_id student_id distance) as
    select t1.*, 
    t1.user_x as x1,
    t1.user_y as y1,
    t2.user_x as x2,
    t2.user_y as y2,
	t2.student_id,
    geodist(x1, y1, x2, y2) as distance

      from mentor t1 join students t2
      on t1.a=t2.a and t1.b=t2.b and t1.c=t2.c  
        order by mentor_id, calculated distance
  ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Wed, 06 Dec 2017 18:28:51 GMT</pubDate>
    <dc:creator>Cruise</dc:creator>
    <dc:date>2017-12-06T18:28:51Z</dc:date>
    <item>
      <title>Match/Link with multiple passes</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418243#M67730</link>
      <description>&lt;P&gt;I'd like to perform one-to-one mentor-to-student match/linkage. One mentor can take only one student and also only if nearest in their residential location.&lt;/P&gt;
&lt;P&gt;Program below works in this demo settings. However, have no idea how many passes would do the job in my real data where I have around N=500 mentors and N=3500 students. And this ratio can be substantially different for different counties (N=62). What would be more efficient programming dealing with multiple passes? (SAS 9.4)&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*MENTORS*/
data mentor;
 input mentor_ID $User_X User_Y;
 cards;
U1 53.8 -32.1 0 23 1
U2 23.8 -96.3 1 25 2
U3 34.5 -70.7 0 28 1
U4 28.7 -76.5 1 35 2
;
run;

/*STUDENTS*/
data students;
 input student_ID $ User_X	User_Y;
 cards;
A1 53.6 -32.1 1 63 1
A2 35.6 -12.3 0 25 2
A3 63.4 -85.4 1 69 1
A4 34.5 -70.9 0 45 2
A5 37.8 -77.7 1 55 1
;
run;

/*CALCULATION OF DISTANCE BETWEEN MENTORS AND STUDENTS*/
proc sql;
create table distance(keep=mentor_id student_id distance) as
select t1.*, 
    t1.user_x*1 as x1,
    t1.user_y*1 as y1,
    t2.user_x*1 as x2,
    t2.user_y*1 as y2,
	t2.student_id,
    geodist(calculated x1, calculated y1, calculated x2, calculated y2) as distance

from mentor t1, students t2
group by t1.mentor_id
having (calculated distance)=calculated distance;
quit; 

/*KEEP MINIMUM DISTANCE*/

/*minimum distance*/
proc sql;
create table dist_min(keep=mentor_id student_id distance) as
select t1.*, 
    t1.user_x*1 as x1,
    t1.user_y*1 as y1,
    t2.user_x*1 as x2,
    t2.user_y*1 as y2,
	t2.student_id,
    geodist(calculated x1, calculated y1, calculated x2, calculated y2) as distance

from mentor t1, students t2
group by t1.mentor_id
having min(calculated distance)=calculated distance;
quit;

/*CHECK OUTPUTS*/
proc print data=distance; run;
proc print data=dist_min; run; 

/*MENTOR TO STUDENT / ONE-TO-ONE MATCHES*/;
/**pass1 */
proc sort data=dist_min;
by student_id distance;
run;
data dist_min_keep; set dist_min;
by student_id distance;
if first.student_id then output;
run;

/*MENTORS MATCHED TO MANY STUDENTS*/
*PASS2;
proc sort data=mentor; by mentor_id; run;
proc sort data=dist_min_keep; by mentor_id distance; run;
data mentor2(where=(distance = .));
merge mentor (in=a) dist_min_keep (in=b);
by mentor_id;
drop student_id;
if a;
run;
data mentor2a(drop=distance); set mentor2;
run;

/*STUDENTS MATCHED TO SAME MENTORS*/
proc sort data=students; by student_id; run;
proc sort data=dist_min_keep; by student_id; run;
data student2(where=(distance=.));
merge students (in=a) dist_min_keep (in=b);
by student_id;
drop mentor_id;
if a;
run;
data student2a(drop=distance); set student2;
run;

*PASS2;
/*CALCULATE AND KEEP MINIMUM DISTANCE AMONG NOT DISTINCTLY MATCHED MENTORS AND STUDENTS FROM THE FIRST PASS */
proc sql;
create table dist_2p (keep=mentor_id student_id distance) as
select
    t1.user_x*1 as x1,
    t1.user_y*1 as y1,
	t1.mentor_id,
    t2.user_x*1 as x2,
    t2.user_y*1 as y2,
	t2.student_id,
    geodist(calculated x1, calculated y1, calculated x2, calculated y2) as distance

from mentor2a t1, student2a t2
group by t1.mentor_id
having min(calculated distance)=calculated distance;
quit;

data final_matches; set Dist_min_keep dist_2p; run;  

proc print data=mentor;
proc print data=students;
proc print data=final_matches; run; &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 10 Sep 2018 21:36:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418243#M67730</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2018-09-10T21:36:35Z</dc:date>
    </item>
    <item>
      <title>Re: Match/Link with multiple passes</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418256#M67734</link>
      <description>&lt;P&gt;Your input datasteps don't include the other three variables and you are multiplying a number by 1 in your proc sql call.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Are you trying to do something like the following?:&lt;/P&gt;
&lt;PRE&gt;/*MENTORS*/
data mentor;
 input mentor_ID $User_X User_Y a b c;
 cards;
U1 53.8 -32.1 0 23 1
U2 23.8 -96.3 1 25 2
U3 34.5 -70.7 0 28 1
U4 28.7 -76.5 1 35 2
;

/*STUDENTS*/
data students;
 input student_ID $ User_X	User_Y a b c;
 cards;
A1 53.6 -32.1 1 63 1
A2 35.6 -12.3 0 25 2
A3 63.4 -85.4 1 69 1
A4 34.5 -70.9 0 45 2
A5 37.8 -77.7 1 55 1
;

proc sql;
  create table distance(keep=mentor_id student_id distance) as
    select t1.*, 
    t1.user_x as x1,
    t1.user_y as y1,
    t2.user_x as x2,
    t2.user_y as y2,
	t2.student_id,
    geodist(x1, y1, x2, y2) as distance

      from mentor t1, students t2
        order by mentor_id, calculated distance
/*         group by t1.mentor_id */
/*           having (calculated distance)=calculated distance */
  ;
quit;

data want (keep=mentor_id student_id distance);
  array students(999) $;
  set distance;
  retain i needone students:;
  by mentor_id;
  if _n_ eq 1 then i=0;
  if first.mentor_id then needone=1;
  if needone then do;
    if student_id not in students then do;
      i+1;
      needone=0;
      students(i)=student_id;
      output;
    end;
  end;
run; 
&lt;/PRE&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;</description>
      <pubDate>Mon, 04 Dec 2017 19:06:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418256#M67734</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2017-12-04T19:06:22Z</dc:date>
    </item>
    <item>
      <title>Re: Match/Link with multiple passes</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418908#M67776</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13711"&gt;@art297&lt;/a&gt;&lt;/P&gt;&lt;P&gt;Amazing art297 as usual.&lt;/P&gt;&lt;P&gt;If I want mentors and students match on other variables a,b,c would I just be able to throw them into sql like below?&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create table distance(keep=mentor_id student_id distance) as
    select t1.*, 
    t1.user_x as x1,
    t1.user_y as y1,
    t2.user_x as x2,
    t2.user_y as y2,
	t2.student_id,
    geodist(x1, y1, x2, y2) as distance

      from mentor t1 join students t2
      on t1.a=t2.a and t1.b=t2.b and t1.c=t2.c  
        order by mentor_id, calculated distance
  ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 06 Dec 2017 18:28:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418908#M67776</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2017-12-06T18:28:51Z</dc:date>
    </item>
    <item>
      <title>Re: Match/Link with multiple passes</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418918#M67777</link>
      <description>&lt;P&gt;I would think so, but would need some example data in order to test it.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 06 Dec 2017 18:57:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418918#M67777</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2017-12-06T18:57:25Z</dc:date>
    </item>
    <item>
      <title>Re: Match/Link with multiple passes</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418929#M67778</link>
      <description>&lt;P&gt;Please see data as suggested. I think it worked out. But I'm curious if any comments.&lt;/P&gt;&lt;P&gt;Does it below linkage make sense if i needed impose certain conditions on the linkage?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13711"&gt;@art297&lt;/a&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*MENTORS*/
data mentor;
 input mentor_ID $User_X User_Y group age gender;
 cards;
U1 53.8 -32.1 0 23 1
U2 23.8 -96.3 1 25 2
U3 34.5 -70.7 0 28 1
U4 28.7 -76.5 1 35 2
;

/*STUDENTS*/
data students;
 input student_ID $ User_X	User_Y group age gender;
 cards;
A1 53.6 -32.1 1 63 1
A2 35.6 -12.3 0 25 2
A3 63.4 -85.4 1 69 1
A4 34.5 -70.9 0 45 2
A5 37.8 -77.7 1 55 1
;

proc sql;
  create table distance(drop=x1 x2 y1 y2 user_x user_y) as
    select t1.*, 
    t1.user_x as x1,
    t1.user_y as y1,
    t2.user_x as x2,
    t2.user_y as y2,
	t2.student_id,
	t2.age as student_age,
	t2.gender as student_gender,
	t2.group as student_group,
    geodist(x1, y1, x2, y2) as distance

      from mentor t1, students t2
        order by mentor_id, calculated distance
/*         group by t1.mentor_id */
/*           having (calculated distance)=calculated distance */
  ;
quit;

data want(keep=mentor_ID group age gender student_ID distance student_age student_gender student_group);
  array students(999) $;
  set distance;
  retain i needone students:;
  by mentor_id;
  if _n_ eq 1 then i=0;
  if first.mentor_id then needone=1;
  if needone then do;
    if student_id not in students then do;
      i+1;
      needone=0;
      students(i)=student_id;
      output;
    end;
  end;
run; 


/*MENTORS*/
data mentor;
 input mentor_ID $User_X User_Y group age gender;
 cards;
U1 53.8 -32.1 0 23 1
U2 23.8 -96.3 1 25 2
U3 34.5 -70.7 0 28 1
U4 28.7 -76.5 1 35 2
;

/*STUDENTS*/
data students;
 input student_ID $ User_X	User_Y group age gender;
 cards;
A1 53.6 -32.1 1 63 1
A2 35.6 -12.3 0 25 2
A3 63.4 -85.4 1 35 2
A4 34.5 -70.9 0 45 2
A5 37.8 -77.7 1 25 2
;

proc sql;
  create table distance1 as
    select t1.*, 
    t1.user_x as x1,
    t1.user_y as y1,
    t2.user_x as x2,
    t2.user_y as y2,
	t2.student_id,
	t2.age as student_age,
	t2.gender as student_gender,
	t2.group as student_group,
    geodist(x1, y1, x2, y2) as distance

      from mentor t1 join students t2
	  on t1.age=t2.age and t1.group=t2.group and t1.gender=t2.gender 
        order by mentor_id, calculated distance
/*         group by t1.mentor_id */
/*           having (calculated distance)=calculated distance */
  ;
quit;

data want1 (keep=mentor_id student_id distance age group gender student_age student_gender student_group);
  array students(999) $;
  set distance1;
  retain i needone students:;
  by mentor_id;
  if _n_ eq 1 then i=0;
  if first.mentor_id then needone=1;
  if needone then do;
    if student_id not in students then do;
      i+1;
      needone=0;
      students(i)=student_id;
      output;
    end;
  end;
run; 

proc print data=want;
title "Linked by nearest distance";
run;

proc print data=want1;
title "Linked by nearest distance given other conditions met";
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 06 Dec 2017 19:49:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418929#M67778</guid>
      <dc:creator>Cruise</dc:creator>
      <dc:date>2017-12-06T19:49:30Z</dc:date>
    </item>
    <item>
      <title>Re: Match/Link with multiple passes</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418934#M67779</link>
      <description>&lt;P&gt;Not enough data to give it a sufficient test but, again, I don't see why it wouldn't work with the conditions you added.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;With your example, it selects the two pairs that match.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 06 Dec 2017 20:11:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Match-Link-with-multiple-passes/m-p/418934#M67779</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2017-12-06T20:11:25Z</dc:date>
    </item>
  </channel>
</rss>

