topic Re: How to find the list of the K nearest neighbors in Statistical Procedures

How to find the list of the K nearest neighbors

Hoccha — Wed, 05 Oct 2011 09:32:32 GMT

Hello,

I’m searching a proc in sas to find the K nearest neighbors in a set of points.

In the following data step, I generate a random set of 100 observations.

DATA observations;

do ID=1 TO 100;

x1 = RAND('NORMAL',0,1);

x2 = RAND('NORMAL',0,1);

x3 = RAND('NORMAL',0,1);

output;

end;

run;

For example, with this set of 100 observations, is there a proc to search the 10 nearest neighbor (Euclidian distance) of the point [ 0.5 ; 0.5 ; 0.5 ]?

Please note that I already found proc discrim to apply a KNN classification. But I want the list of the K nearest neighbor and not to make a classification.

Thx!

Re: How to find the list of the K nearest neighbors

Tom — Wed, 05 Oct 2011 11:44:23 GMT

Why not just code it yourself?

data observations;
  do ID=1 TO 100; 
    x1 = RAND('NORMAL',0,1);
    x2 = RAND('NORMAL',0,1); 
    x3 = RAND('NORMAL',0,1);
    output;
  end;
run;
proc sql noprint ;
  create table distance as 
    select distinct id,x1,x2,x3,sqrt((x1-y1)**2 + (x2-y2)**2 + (x3-y3)**2) as distance
    from observations
       , (select 0.5 as y1,0.5 as y2, 0.5 as y3 from observations(obs=1) )
    order by distance desc
  ;
quit;
data top10;
  set distance (obs=10);
run;
proc print; run;

Re: How to find the list of the K nearest neighbors

PGStats — Wed, 05 Oct 2011 16:56:38 GMT

One quick way to find nearest neighbors in a large set of observations is PROC MODECLUS. Your specific problem could be solved by:

DATA observations;
do ID=1 TO 100;  
   x1      = RAND('NORMAL',0,1); 
   x2      = RAND('NORMAL',0,1);
   x3      = RAND('NORMAL',0,1);
output;
end;
run;
DATA position;
ID = -999;
x1 = 0.5;
x2 = 0.5;
x3 = 0.5;
run;
proc sql;
create table test as
(select * from position)
union all
(select * from observations)
order by ID;
ods _all_ close;
proc modeclus data=test dk=11 /* = 10 observations + 1 position */ neighbor;
var x1 x2 x3;
id ID;
ods output Neighbor=nTest;
run;
ods listing;
proc sql inobs=10;
select nBor as ID, distance from nTest;

Re: How to find the list of the K nearest neighbors

Hoccha — Thu, 06 Oct 2011 07:20:04 GMT

Hello Tom & PGStats,

many thanks for your help !

based on the message from Tom, I have buil the following macro "listOfKNN":

data observations;
do ID=1 TO 100000;
    x1 = RAND('NORMAL',0,1);
    x2 = RAND('NORMAL',0,1);
    x3 = RAND('NORMAL',0,1);
    output;
end;
run;

%macro listOfKNN (obser , target, K, nameOutput);
%Let dim = %eval(%SYSFUNC(count(&target,%NRSTR( )))+1) ;

%let ttt =;
%DO ii = 1 %TO &dim;
%let ttt = &ttt.(x&ii.-%scan(&target,&ii," "))**2;
%IF &ii NE &dim %THEN
%let ttt = &ttt.+;
%END;

proc sql noprint ;
    create table &nameOutput as
     select distinct *, sqrt(&ttt)as distance
     from &obser
     order by distance;
quit;

data &nameOutput;
set &nameOutput (obs=&K);
drop distance;
run;
%mend listOfKNN;

/*make a test*/

%listOfKNN( obser   = observations,
   target = 0.5 1 1,
   K   = 5,
   nameOutput = out
);

B.R.

Olivier