Here is a different approach where splitting the input data set is avoided. I think it gives the wanted output.
Orphans are identified by a SQL join, where the input data set is joined by itself with match on user as as a join criteria, so it holds childs and parents with the same user together.
Ancestors are handled by a format to set parents on childs, and this it done in a loop, until the chain is exhausted. It makes use of extra variables, where user-base, resp user-cur, are concatenated, so user becomes part of the key.
/* test data 1 */
data have1;
format user 8. cur base 10.1;
user=0; base=5.1; cur=5.5; output;
user=0; base=5.2; cur=5.5; output;
user=0; base=5.0; cur=5.2; output;
user=0; base=4.7; cur=5.0; output;
user=0; base=4.6; cur=5.2; output;
user=0; base=.; cur=4.6; output;
user=2; base=5.4; cur=6.1; output;
user=2; base=5.3; cur=6.1; output;
user=2; base=5.0; cur=5.3; output;
user=2; base=4.7; cur=5.0; output;
user=2; base=4.5; cur=4.7; output;
run;
/* find orphans */
proc sql;
create table orphans as
select a.user, coalesce(a.base,a.cur) as orphan format=3.1
from have1 as a left join have1 as b
on a.user = b.user and (b.cur = a.base or b.cur = .)
where b.user = .;
quit;
/* test data 2 */
data have2;
format user 8. cur base 10.1;
user=1; base=4.1; cur=4.2; output; /* 4.1 to 4.2 - Ancestor here is 4.4 */
user=1; base=4.2; cur=4.4; output; /* 4.2 to 4.4 - Ancestor here is 4.4 */
user=1; base=4.6; cur=5.5; output; /* 4.6 to 5.5 - Ancestor here is 6.1 */
user=1; base=5.5; cur=6.1; output; /* 5.5 to 6.1 - Ancestor here is 6.1 */
user=2; base=4.2; cur=4.6; output; /* 4.2 to 4.6 - Ancestor here is 6.1 */
user=2; base=4.6; cur=5.5; output; /* 4.6 to 5.5 - Ancestor here is 6.1 */
user=2; base=5.5; cur=6.1; output; /* 5.0 to 6.1 - Ancestor here is 6.1 */
user=3; base=4.0; cur=4.2; output; /* 4.0 to 4.2 - Ancestor here is 5.5 */
user=3; base=4.2; cur=4.6; output; /* 4.2 to 4.6 - Ancestor here is 5.5 */
user=3; base=4.6; cur=5.5; output; /* 4.6 to 6.1 - Ancestor here is 5.5 */
run;
/* Find ancestors */
data w; set have2 end=eof;
type = 'C'; hlo=''; fmtname = 'par';
start = put(user,z8.)||'-'||put(base,z10.1);
label = put(user,z8.)||'-'||put(cur,z10.1);
output;
if eof then do;
hlo = 'O'; label='';
output;
end;
run;
proc format cntlin=w;
run;
data ancestors;
drop type hlo fmtname start label c w inter;
set w (where=(hlo=''));
format ancestor 10.1;
c = 0;
do until (w = .);
c = c + 1;
if c = 1 then inter = put(start,$par.); else inter = put(inter,$par.);
w = input(scan(inter,2,'-'),10.1);
if w ne . then ancestor = w;
end;
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.