I got the following HR data set with recursive IDs that all link back to an original ID. However, there are character variables that I also need to bring along with the recursive data set.
In the example below, the initial ID is 11, and the follow on ID 22 links via an originating ID 11 and so on. As you can see, they all link back to 11 via a recursive lookup.
I've used the chained lookup code here https://support.sas.com/kb/26/160.html, but can't get the variables to come along.
data have;
input id orig_id x1 $ x2 $ x3 $;
datalines;
11 . A B C
22 11 D E F
44 22 G H I
57 44 J K L
18 57 M N O
;
DATA WANT;
INPUT id Y1 $ Y2 $ Y3 $ Y4 $ Y5 $ Y6 $ Y7 $ Y8 $ Y9 $ Y10 $ Y11$ Y12 $ Y13$ Y14 $ Y15 $;
datalines;
11 A B C D E F G H I J K L M N O
;
RUN;
/*
You could combine 'employee_num' with 'id' and 'orig_id' to make a unique ID
*/
data x;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
20 88 . W T U
20 98 88 V Z Y
;
RUN;
data key;
set x;
k=1000000000*employee_num+orig_id;
x=x1; output;
x=x2; output;
x=x3; output;
k=1000000000*employee_num+id;
x=x1; output;
x=x2; output;
x=x3; output;
keep k x;
run;
data have;
set x(keep=employee_num id orig_id rename=(orig_id=from id=to));
from=1000000000*employee_num+from;
to=1000000000*employee_num+to;
drop employee_num;
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
proc sql ;
create table ancenstor as
select from as ancenstor from have(where=(from is not missing))
where from not in (select to from have where from is not missing);
create table final_want as
select distinct household,x
from want(where=(node is not missing)) as a left join key(where=(k is not missing)) as b
on a.node=b.k
outer union corr
select household,put(mod(node,1000000000),best32. -l) as x,int(node/1000000000) as employee_num ,'ancenstor' as id from want where node in (select ancenstor from ancenstor)
order by 1,2
;
quit;
data final_want2;
merge final_want(drop=employee_num) final_want(keep=employee_num household where=(employee_num is not missing));
by household;
run;
I can elaborate more on the question by providing an employee number, the IDs represent a case num for the employee and are all distinct.
data have;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
2 88 . W T U
2 98 88 V Z Y
;
RUN;
DATA WANT;
INPUT employee_num id Y1 $ Y2 $ Y3 $ Y4 $ Y5 $ Y6 $ Y7 $ Y8 $ Y9 $ Y10 $ Y11$ Y12 $ Y13$ Y14 $ Y15 $;
datalines;
1 11 A B C D E F G H I J K L M N O
2 88 W T U V Z y . . . . . . . . .
;
RUN;
data x;
input id orig_id x1 $ x2 $ x3 $;
datalines;
11 . A B C
22 11 D E F
44 22 G H I
57 44 J K L
18 57 M N O
;
data key;
set x;
k=orig_id;
x=x1; output;
x=x2; output;
x=x3; output;
k=id;
x=x1; output;
x=x2; output;
x=x3; output;
keep k x;
run;
data have;
set x(keep=id orig_id rename=(orig_id=from id=to));
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
proc sql ;
create table ancenstor as
select from as ancenstor from have(where=(from is not missing))
where from not in (select to from have where from is not missing);
create table final_want as
select distinct household,x
from want(where=(node is not missing)) as a left join key(where=(k is not missing)) as b
on a.node=b.k
union
select household,put(node,best. -l) from want where node in (select ancenstor from ancenstor)
;
quit;
Will this work with my follow up example, sorry I changed the original example to include an employee_num, and pull the ID? I have yet to run through the code you supplied, but it's much appreciated.
data have;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
2 88 . W T U
2 98 88 V Z Y
;
RUN;
DATA WANT;
INPUT employee_num id Y1 $ Y2 $ Y3 $ Y4 $ Y5 $ Y6 $ Y7 $ Y8 $ Y9 $ Y10 $ Y11$ Y12 $ Y13$ Y14 $ Y15 $;
datalines;
1 11 A B C D E F G H I J K L M N O
2 88 W T U V Z y . . . . . . . . .
;
RUN;
/*
You could combine 'employee_num' with 'id' and 'orig_id' to make a unique ID
*/
data x;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
20 88 . W T U
20 98 88 V Z Y
;
RUN;
data key;
set x;
k=1000000000*employee_num+orig_id;
x=x1; output;
x=x2; output;
x=x3; output;
k=1000000000*employee_num+id;
x=x1; output;
x=x2; output;
x=x3; output;
keep k x;
run;
data have;
set x(keep=employee_num id orig_id rename=(orig_id=from id=to));
from=1000000000*employee_num+from;
to=1000000000*employee_num+to;
drop employee_num;
run;
data full;
set have end=last;
if _n_ eq 1 then do;
declare hash h();
h.definekey('node');
h.definedata('node');
h.definedone();
end;
output;
node=from; h.replace();
from=to; to=node;
output;
node=from; h.replace();
if last then h.output(dataset:'node');
drop node;
run;
data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();
if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
from_to.definekey('from');
from_to.definedata('to');
from_to.definedone();
if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
no.definekey('node');
no.definedata('node');
no.definedone();
do while(hi_no.next()=0);
household+1; output;
count=1;
key=node;_ha.add();
last=node;ha.add();
rc=hi.first();
do while(rc=0);
from=last;rx=from_to.find();
do while(rx=0);
key=to;ry=_ha.check();
if ry ne 0 then do;
node=to;output;rr=no.remove(key:node);
key=to;_ha.add();
count+1;
last=to;ha.add();
end;
rx=from_to.find_next();
end;
rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;
proc sql ;
create table ancenstor as
select from as ancenstor from have(where=(from is not missing))
where from not in (select to from have where from is not missing);
create table final_want as
select distinct household,x
from want(where=(node is not missing)) as a left join key(where=(k is not missing)) as b
on a.node=b.k
outer union corr
select household,put(mod(node,1000000000),best32. -l) as x,int(node/1000000000) as employee_num ,'ancenstor' as id from want where node in (select ancenstor from ancenstor)
order by 1,2
;
quit;
data final_want2;
merge final_want(drop=employee_num) final_want(keep=employee_num household where=(employee_num is not missing));
by household;
run;
This is perfect, thanks so much!
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.