I think this question is more complicated that you demostrator with this simple data.
I think it is searching a tree problem.
data xx;
length sub num rep $20;
sub = '001';num='1';rep = '';output;
sub = '001';num='2';rep = '';output;
sub = '001';num='3';rep = '002';output;
sub = '001';num='4';rep = '';output;
sub = '001';num='5';rep = '003';output;
sub = '001';num='6';rep = '';output;
sub = '002';num='1';rep = '';output;
sub = '002';num='2';rep = '';output;
sub = '002';num='3';rep = '';output;
sub = '002';num='4';rep = '001';output;
run;
data have;
set xx(where=(num is not missing and rep is not missing));
length from to $ 30;
from=catx('|',sub,input(rep,best.));
to=catx('|',sub,input(num,best.));
keep from to;
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
/*****Find the ancestor/top one *******/
proc sql;
create table ancestor as
select node,max(grp) as grp from
(
select a.*,b.grp from want as a left join
(
select from,'grp'||put(input(scan(from,-1,'|'),best.),z10.) as grp from have where from not in (select to from have)
) as b
on a.node=b.from
) group by household;
quit;
data final_want;
if _n_=1 then do;
if 0 then set ancestor;
declare hash h(dataset:'ancestor');
h.definekey('node');
h.definedata('grp');
h.definedone();
end;
set xx;
length from to $ 30;
call missing(grp);
if not missing(rep) then do;
from=catx('|',sub,input(rep,best.));rc=h.find(key:from);
end;
if not missing(num) then do;
to=catx('|',sub,input(num,best.));rc=h.find(key:to);
end;
drop from to rc node;
run;
... View more