Solved: Re: Recursive or chained lookup with a data twist

HR_coder123 · Posted 05-17-2023 09:17 PM

I got the following HR data set with recursive IDs that all link back to an original ID. However, there are character variables that I also need to bring along with the recursive data set.

In the example below, the initial ID is 11, and the follow on ID 22 links via an originating ID 11 and so on. As you can see, they all link back to 11 via a recursive lookup.

I've used the chained lookup code here https://support.sas.com/kb/26/160.html, but can't get the variables to come along.

data have;
input id orig_id x1 $ x2 $ x3 $;
datalines;
11 . A B C
22 11 D E F
44 22 G H I
57 44 J K L
18 57 M N O
;

DATA WANT;
INPUT id Y1 $ Y2 $ Y3 $ Y4 $ Y5 $ Y6 $ Y7 $ Y8 $ Y9 $ Y10 $ Y11$ Y12 $ Y13$ Y14 $ Y15 $;
datalines;
11 A B C D E F G H I J K L M N O
;
RUN;

Ksharp · Posted 05-19-2023 07:31 AM

/*
You could combine 'employee_num' with 'id' and 'orig_id' to make a unique ID
*/
data x;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
20 88 . W T U
20 98 88 V Z Y
;
RUN;

data key;
 set x;
 k=1000000000*employee_num+orig_id; 
 x=x1; output;
 x=x2; output;
 x=x3; output;

 k=1000000000*employee_num+id; 
 x=x1; output;
 x=x2; output;
 x=x3; output;
 keep k x;
run;




data have;
 set x(keep=employee_num id orig_id rename=(orig_id=from id=to));
 from=1000000000*employee_num+from; 
 to=1000000000*employee_num+to; 
 drop employee_num;
run;

data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;

data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;


proc sql ;
create table ancenstor as
select from as ancenstor from have(where=(from is not missing)) 
  where from not in (select to from have where from is not missing);

create table final_want as

select distinct household,x
 from want(where=(node is not missing)) as a left join key(where=(k is not missing)) as b
  on a.node=b.k
outer union corr
select household,put(mod(node,1000000000),best32. -l) as x,int(node/1000000000) as employee_num ,'ancenstor' as id from want where node in (select ancenstor from ancenstor)
order by 1,2
;
quit;

data final_want2;
 merge final_want(drop=employee_num) final_want(keep=employee_num household where=(employee_num is not missing));
 by household;
run;

View solution in original post

HR_coder123 · Posted 05-18-2023 08:10 AM

I can elaborate more on the question by providing an employee number, the IDs represent a case num for the employee and are all distinct.

data have;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
2 88 . W T U
2 98 88 V Z Y
;
RUN;

DATA WANT;
INPUT employee_num id Y1 $ Y2 $ Y3 $ Y4 $ Y5 $ Y6 $ Y7 $ Y8 $ Y9 $ Y10 $ Y11$ Y12 $ Y13$ Y14 $ Y15 $;
datalines;
1 11 A B C D E F G H I J K L M N O
2 88 W T U V Z y . . . . . . . . .
;
RUN;

Ksharp · Posted 05-18-2023 08:57 AM

data x;
input id orig_id x1 $ x2 $ x3 $;
datalines;
11 . A B C
22 11 D E F
44 22 G H I
57 44 J K L
18 57 M N O
;

data key;
 set x;
 k=orig_id; 
 x=x1; output;
 x=x2; output;
 x=x3; output;

 k=id; 
 x=x1; output;
 x=x2; output;
 x=x3; output;
 keep k x;
run;




data have;
 set x(keep=id orig_id rename=(orig_id=from id=to));
run;

data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;


proc sql ;
create table ancenstor as
select from as ancenstor from have(where=(from is not missing)) 
  where from not in (select to from have where from is not missing);

create table final_want as

select distinct household,x
 from want(where=(node is not missing)) as a left join key(where=(k is not missing)) as b
  on a.node=b.k
union
select household,put(node,best. -l) from want where node in (select ancenstor from ancenstor)
;
quit;

HR_coder123 · Posted 05-18-2023 10:02 AM

Will this work with my follow up example, sorry I changed the original example to include an employee_num, and pull the ID? I have yet to run through the code you supplied, but it's much appreciated.

data have;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
2 88 . W T U
2 98 88 V Z Y
;
RUN;

DATA WANT;
INPUT employee_num id Y1 $ Y2 $ Y3 $ Y4 $ Y5 $ Y6 $ Y7 $ Y8 $ Y9 $ Y10 $ Y11$ Y12 $ Y13$ Y14 $ Y15 $;
datalines;
1 11 A B C D E F G H I J K L M N O
2 88 W T U V Z y . . . . . . . . .
;
RUN;

Ksharp · Posted 05-19-2023 07:31 AM

/*
You could combine 'employee_num' with 'id' and 'orig_id' to make a unique ID
*/
data x;
input employee_num id orig_id x1 $ x2 $ x3 $;
datalines;
1 11 . A B C
1 22 11 D E F
1 44 22 G H I
1 57 44 J K L
1 18 57 M N O
20 88 . W T U
20 98 88 V Z Y
;
RUN;

data key;
 set x;
 k=1000000000*employee_num+orig_id; 
 x=x1; output;
 x=x2; output;
 x=x3; output;

 k=1000000000*employee_num+id; 
 x=x1; output;
 x=x2; output;
 x=x3; output;
 keep k x;
run;




data have;
 set x(keep=employee_num id orig_id rename=(orig_id=from id=to));
 from=1000000000*employee_num+from; 
 to=1000000000*employee_num+to; 
 drop employee_num;
run;

data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;

data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;


proc sql ;
create table ancenstor as
select from as ancenstor from have(where=(from is not missing)) 
  where from not in (select to from have where from is not missing);

create table final_want as

select distinct household,x
 from want(where=(node is not missing)) as a left join key(where=(k is not missing)) as b
  on a.node=b.k
outer union corr
select household,put(mod(node,1000000000),best32. -l) as x,int(node/1000000000) as employee_num ,'ancenstor' as id from want where node in (select ancenstor from ancenstor)
order by 1,2
;
quit;

data final_want2;
 merge final_want(drop=employee_num) final_want(keep=employee_num household where=(employee_num is not missing));
 by household;
run;

HR_coder123 · Posted 05-19-2023 09:42 AM

This is perfect, thanks so much!

Recursive or chained lookup with a data twist

Re: Recursive or chained lookup with a data twist

Re: Recursive or chained lookup with a data twist

Re: Recursive or chained lookup with a data twist

Re: Recursive or chained lookup with a data twist

Re: Recursive or chained lookup with a data twist

Re: Recursive or chained lookup with a data twist

SAS Innovate 2025: Register Now

SAS Training: Just a Click Away