Here's a basic solution that works for small "families" (3 or less observations that share a common "survivor"). If you have larger families, you would need to to make this a macro with a do loop and keep running the sort & merge until no survivor swaps were made. If you have a circular association in a family, I worry that this would run infinitely.
DATA have;
INFILE DATALINES DSD;
INPUT NAME1 $ RECORDS1 NAME2 $ RECORDS2;
DATALINES;
TOM,5243,TOMMY,4
BRAD,873,BRADLEY,219
BRADLEY,219,BRAD,873
JOHN,61017,JOHNNY,905
JOHNNY,905,JOHN,61017
JONATHAN,500,JOHNNY,905
;
run;
/* name with highest count will become "survivor" and always stored in name1 */
/* move the more frequent name into name1 and less frequent name to name2 */
data have2;
set have;
if records2 > records1 then do;
tempname = name2;
temprecords = records2;
name2 = name1;
records2 = records1;
name1 = tempname;
records1 = temprecords;
end;
drop tempname temprecords;
run;
/* name2 is the less frequently used name; call it "nickname" */
/* drop out the redundant pairs */
proc sort data=have2 out=have3 nodupkey;
by name1 records1 name2 records2;
run;
/* create merge key */
data have3; set have3;
rename name1=namekey;
rename records1=recordskey;
run;
/* make a copy of the file and sort it on the name2 ("nickname") */
proc sort data=have3 out=have4;
by name2 records2 name1 records1;
run;
/* create a merge key */
data have4;
set have4;
rename name2=namekey;
rename records2=recordskey;
run;
/* merge dataset onto itself.
if nickname also appears in the table as a survivor name,
assign real survivor to nickname as new survivor (where nickname appears as survivor). */
data have5;
merge have3 (in=a) have4 (in=b);
by namekey;
if a and not b then do;
output;
end;
if a and b then do;
namekey = name3;
recordskey = records3;
output;
end;
rename namekey=name1;
rename recordskey=records1;
drop name3 records3;
run;
... View more