Thank you everyone for your help. I've managed to hack together a program that will work for my needs. I'm a little hesitant to run against my large data set because I don't think it's that efficient.. This is what I've come up with. Any comments/suggestions are welcome! Thanks again! Chris data have;
infile datalines dsd truncover;
input IDnum:$7. fcode:$4.;
datalines4;
4505895,E001
4520745,E001
4513005,E003
4577985,E003
4520745,E005
4505895,E006
4577985,E006
4513005,E010
4520745,E010
4577985,E010
4505895,E011
4513005,E011
4520745,E011
4577985,E011
4505895,E012
4513005,E012
4520745,E012
4505895,E016
4520745,E016
4505895,E017
4520745,E017
4577985,E017
4505895,E020
4520745,E020
4577985,E020
4505895,E022
4513005,E022
4520745,E022
4577985,E022
4505895,E023
4513005,E023
4520745,E023
4577985,E023
4520745,E024
4505895,E025
4577985,E102
5000001,E102
;;;;
/* generate distinct list of IDnums */
proc sql;
create table work.dlist as
select IDnum, count(fcode) as countcodes
from have
group by IDnum;
quit;
/* get the number of observations to loop through */
%let dsid=%sysfunc(open(work.dlist));
%let num=%sysfunc(attrn(&dsid, nobs));
%let rc=%sysfunc(close(&dsid));
/* build the IDnum comparison table */
%macro buildIDtable(IDnum=);
data work.curIDnum;
set work.have;
where IDnum= "&IDnum";
run;
%mend buildIDtable;
/* output the IDnum comparison results to the table */
%macro appendTable;
proc sql;
insert into work.need1
select a.IDnum as REFIDnum, b.IDnum as matchIDnum, b.fcode as matchFcode
from work.curIDnum a inner join work.have b
on a.IDnum <> b.idnum and a.fcode = b.fcode;
quit;
%mend appendTable;
/*copy values of IDnum to macro variables */
data _null_;
set work.dlist;
call symput('IDnum'|| put(IDnum,char(7)));
run;
/* call the dowork macro once for each IDnum */
%macro workloop;
%local x;
%do x=1 %to #
%buildIDtable(IDnum=&&IDnum&x);
%appendTable;
%end;
%mend workloop;
/* start the working loop for each IDnum in the have dataset */
%workloop;
/* count the number of matches by IDnum */
proc sql;
create table work.need2 as
select REFIDnum, matchIDnum, count(matchFcode) as countMatch
from work.need1
group by refIDnum, matchIDnum;
quit;
/* get the percent match by IDnum */
proc sql;
create table work.need3 as
select a.REFIDnum, a.matchIDnum, a.countMatch, b.countCodes as totalCodesM, a.countMatch/b.countCodes as percentMatch
from work.need2 a inner join work.dlist b
on a.matchIDnum=b.IDnum;
quit;
/* summarize final results */
proc sql;
create table work.want as
select REFIDnum, count(matchIDnum) as totalSimilar
from work.need3
where percentmatch> 0.33
group by REFIDnum;
quit;
... View more