Hi @KonstantinVasil
I tried to follow @Astounding's idea with a format and came to the following solution. It expects input to be a hierarchy, i.e. any node can have only one parent-node, but several child-notes, and I added an extra line to input, so c->d and c->e to make an extra child-node.
data id_lists;
input prev_id $ next_id $;
datalines;
a b
w x
b c
x y
c d
c e
;
* Check input - any node can have only one parent-node;
* A hierarchy with more than one child-node to a parent-node is supported;
proc sql;
create table a as
select next_id, count(*) as antal from id_lists
group by next_id
having antal > 1;
quit;
%if &sqlobs = 0 %then %do;
* Add originating parent-nodes as their own parents;
* Done because this is part of the wanted output (a-a, w-w);
proc sql;
create table self_origin as
select distinct prev_id, prev_id as next_id
from id_lists
where
prev_id ne '' and
prev_id not in (select next_id from id_lists);
quit;
data have; set id_lists self_origin;
run;
* Build a format to translate node to parent-node;
data fmt; set have (rename=(next_id=start prev_id=label));
retain fmtname '$parent';
run;
proc format cntlin=fmt;
run;
* Translate child-note to parent-node until translation is exhausted;
data a (drop=prev_id oldorig); set have (rename=(next_id=id_sequence));
sequence_origin = put(id_sequence,$parent.);
do until (sequence_origin = '*');
oldorig = sequence_origin;
sequence_origin = put(sequence_origin,$parent.);
if oldorig ne '' and oldorig = sequence_origin then do;
output;
leave;
end;
end;
run;
* sort output in same order as wanted, until now no sorting was done;
proc sort data=a; by sequence_origin id_sequence;
run;
%end;
%else %do;
%put Sorry - input data is not a hierarchy;
%end;
The output is:
... View more