/*
It is fuzz matched problem.
*/
data have;
infile cards expandtabs;
input (Record FirstName LastName Gender) ($);
cards;
009 Jason Aman M
009 Jazon Aman M
100 Marshall Reid F
101 Reed Jones M
102 Aundo Kalen F
102 Aundy Kalen F
102 Jack Sarz M
110 Jack Sarz M
110 Kack Sarz M
;
proc sql;
create table k as
select a.Record,a.FirstName,b.FirstName as _FirstName,spedis(a.FirstName,b.FirstName) as dis
from have as a,have as b
where a.Record=b.Record and a.FirstName ne b.FirstName
;
quit;
data k2;
set k;
call sortc(FirstName,_FirstName);
run;
proc sort data=k2 nodupkey;by Record FirstName _FirstName;run;
data k3;
set k2;
if dis<50 then group=1;
else group=dis;
output;
if dis<50 then group=1;
else group=dis+1;
FirstName=_FirstName;output;
keep Record FirstName group;
run;
proc sort data=k3 out=k4 ;by Record FirstName group;run;
proc sort data=k4 nodupkey ;by Record FirstName;run;
proc sql;
create table temp as
select a.*,b.group
from have as a left join k4 as b
on a.Record=b.Record and a.FirstName=b.FirstName;
quit;
proc sort data=temp out=want nodupkey;
by Record group;
run;
... View more