I am runing a KNN algorithm and would like to be able to extract the distance for each tested variable. For example, if the data is trained on Dataset A and tested on Dataset B, then what is the distance between a given record in Dataset B and its nearest neighbor in Dataset A. I am using the PROC DISCRIM procedure to execute this.
DATA MASTER;
INFILE DATALINES;
INPUT k errorrate;
DATALINES;
RUN;
%macro KNND;
%do i=1 %TO 50;
%IF %SYSFUNC(MOD(&i,2))=1 %THEN %DO;
PROC DISCRIM
DATA=TRAIN
TESTDATA=TEST
TESTOUT=_score&i
METHOD=NPAR
K=&i;
CLASS BAD;
VAR VAR1 VAR2 VAR3 VAR4;
RUN;
PROC SQL noprint;
CREATE TABLE stage&i AS
SELECT &i AS k,
SUM(CASE WHEN BAD = _INTO_ THEN 0 ELSE 1 END)/COUNT(NIIN) AS errorrate
FROM _score&i;
QUIT;
PROC APPEND BASE=MASTER DATA=stage&i;
RUN;
%END;
%END;
%MEND KNND;
%KNND