DATA Step, Macro, Functions and more

comparing columns and creating new ones

Reply
N/A
Posts: 1

comparing columns and creating new ones

Hi all,

I have a little problem, maybe you can help me. So I have a dataset for example like this:

boss sub
Jim Kate
Lisa Jim
Dave Lisa
Dave Erik
Anna Mary
Anna Ben
Erik Anna
. Dave

and I should list all of the subordinates for each boss.

The table I would like to have shoud be something like this:

boss sub1 sub2 sub3 sub4 sub5 sub6 sub7
Jim Kate
Lisa Jim Kate
Dave Lisa Jim Kate Erik Anna Mary Ben
Anna Mary Ben
Erik Anna Mary Ben

Or even better like this:
boss sub
Jim Kate
Lisa Jim
Lisa Kate
Dave Lisa
Dave Jim
Dave Kate
Dave Erik
Dave Anna
Dave Mary
Dave Ben
Anna Mary
Anna Ben
Erik Anna
Erik Mary
Erik Ben

Maybe this would be ok too:

sub boss1 boss2 boss3 boss4
Kate Jim Lisa Dave
Jim Lisa Dave
Lisa Dave
Erik Dave
Mary Anna Erik Dave
Ben Anna Erik Dave
Dave

Do you have any tips, how to do this? Naturally, I don't know in advance how many boss/subordinates the data includes.. Thanks for any help
Super Contributor
Super Contributor
Posts: 3,174

Re: comparing columns and creating new ones

Look at using SAS PROC TRANSPOSE in the SAS PROCEDURES documentation; also you will find much supplemental technical / conference reference material on the SAS.COM support website and likely in these forum archives.

Scott Barry
SBBWorks, Inc.
Super User
Posts: 9,671

Re: comparing columns and creating new ones

I think your a little problem is very very complicated and very very tough!!!!!
Hope somebody can post a more simple code ,Especial for data _null_;
Where are you, @data _null_;????

[pre]



data temp(where=(boss is not missing));
input boss $ sub $;
cards;
Jim Kate
Lisa Jim
Dave Lisa
Dave Erik
Anna Mary
Anna Ben
Erik Anna
. Dave
;
run;

proc sort data=temp;
by boss;
run;

data temp_unique temp_dup;
set temp;
by boss;
if first.boss then output temp_unique;
else output temp_dup;
run;

data want;
declare hash ha (hashexp:10);
ha.definekey('_boss');
ha.definedata('sub');
ha.definedone();

do until(last);
set temp_unique(rename=(boss=_boss)) end=last;
ha.add();
end;

do until(_last);
set temp_unique end=_last;
output;
_boss=boss;
do until(rc ne 0);
rc=ha.find();
if rc = 0 then output;
_boss=sub;
end;
end;
drop _boss rc;
run;



%macro loop;
%let dsid=%sysfunc(open(temp_dup));
%let nobs=%sysfunc(attrn(&dsid,nobs));
%let dsid=%sysfunc(close(&dsid));
%put nobs=&nobs;
%do i=1 %to &nobs ;
data single_obs;
set temp_dup;
if _n_ eq &i;
run;

data temp_unique&i;
merge temp_unique single_obs;
by boss;
run;

data want&i;
declare hash ha (hashexp:10);
ha.definekey('_boss');
ha.definedata('sub');
ha.definedone();

do until(last);
set temp_unique&i(rename=(boss=_boss)) end=last;
ha.add();
end;

do until(_last);
set temp_unique&i end=_last;
output;
_boss=boss;
do until(rc ne 0);
rc=ha.find();
if rc = 0 then output;
_boss=sub;
end;
end;
drop _boss rc;
run;
proc append base=want data=want&i force;
run;
%end;

%mend loop;


%loop

%macro loopnext;
%let dsid=%sysfunc(open(temp_unique));
%let nobs=%sysfunc(attrn(&dsid,nobs));
%let dsid=%sysfunc(close(&dsid));
%put nobs=&nobs;
%do i=1 %to &nobs ;
data single_obs;
set temp_unique;
if _n_ eq &i;
run;

data temp_dup&i;
merge temp_dup single_obs;
by boss;
run;

data want&i;
declare hash ha (hashexp:10);
ha.definekey('_boss');
ha.definedata('sub');
ha.definedone();

do until(last);
set temp_dup&i(rename=(boss=_boss)) end=last;
ha.add();
end;

do until(_last);
set temp_dup&i end=_last;
output;
_boss=boss;
do until(rc ne 0);
rc=ha.find();
if rc = 0 then output;
_boss=sub;
end;
end;
drop _boss rc;
run;
proc append base=want data=want&i force;
run;
%end;

%mend loopnext;

%loopnext


proc sort data=want out=final_want nodupkey;
by boss sub;
run;





[/pre]


Ksharp
Occasional Contributor
Posts: 9

Re: comparing columns and creating new ones

Hi can propose following:

HTH,

Eric

---
data hier;
input boss: $32. sub:$32.;
CARDS;
Jim Kate
Lisa Jim
Dave Lisa
Dave Erik
Anna Mary
Anna Ben
Erik Anna
;
RUN;


%macro recur();

/*
we are speaking about recursive stuff so there will be issues if one boss has any (in)direct sub being his own boss!
one may add a maxIter argument
*/
%let lvl=1;
proc sql;
create table lvl1 as select * from hier order by sub;
quit;

proc sql noprint;
select count(*) into:nsub
from lvl&lvl. where sub ne '';
quit;

data all3;
set lvl1(rename=(sub=sub1));
run;

data all2; set lvl1; run;

%do %while(⊄ ne 0);
%let lvl=%eval(&lvl+1);
proc sql;
create table lvl&lvl. as
select distinct H.sub as boss, I.sub as sub
from lvl%eval(&lvl.-1) as H left join hier as I
on H.sub=i.boss
where H.sub ne ''
and H.sub ne i.sub /*careful then: do you have ID?*/
order by h.sub;
quit;

proc sql noprint;
select count(*) into:nsub
from lvl&lvl. where sub ne '';
quit;

%if &nsub. ne 0 %then %do;
proc sql;
create table all3 as
select * from all3 as a
left join lvl&lvl.(rename=(sub=sub&lvl.)) as b
on a.sub%eval(&lvl.-1)=b.boss;
quit;
proc append base=all2 data=all(where=(sub ne '') keep=boss sub&lvl. rename=(sub&lvl.=sub)); run;
proc sort data=all2 nodupkey; by boss sub; run;
%end;
%end;

proc print data=all3; run;
proc print data=all2; run;
%mend;
%recur();
---
Regular Contributor
Posts: 241

Re: comparing columns and creating new ones

Given that the data don't include cycles, this is an elementary exercise of traversing a tree (or trees). It is a pity that we cannot pass (hash) object references to user-written functions. If it were possible, then I would have written the recursive part as a user-written function. Here, I use the link blocks instead. The link blocks allow recursive "calls" up to 10 or 11 deep by default. But this limit can be relaxed by using the stack= option to the data statement. (This option is now documented in 9.2.) You need to create and manage call stacks by yourself, though.

   /* test data */
   data one;
      input boss $ sub $;
   cards;
   Jim Kate
   Lisa Jim
   Dave Lisa
   Dave Erik
   Anna Mary
   Anna Ben
   Erik Anna
   . Dave
   Mary Tom   <-- new data
   Mary Bill  <-- new data
   ;
   run;


   /* find all the subs */
   %let maxDepth = 100;
   data _null_ / stack=&maxDepth;

      /* main */
      link setup;
      do until (end);
         set one end=end;
         thisBoss = boss;
         link find;
      end;
      link out;
      return;

      /* helpers */
      setup:
         /* hashes */
         dcl hash in(dataset:"work.one", multidata:'y');
         in.definekey('boss');
         in.definedata('boss','sub');
         in.definedone();
         dcl hash out(ordered:'yes');
         out.definekey('boss','sub');
         out.definedata('boss','sub');
         out.definedone();

         /* const and global vars */
         retain OK 0;
         length thisBoss $8;

         /* call stack */
         array stack_boss[&maxDepth] $8 _temporary_;
         array stack_sub[&maxDepth] $8 _temporary_;
         array stack_rc[&maxDepth] _temporary_;
         array stack_i[&maxDepth] _temporary_;
         array stack_j[&maxDepth] _temporary_;
         stack_top = 0;
      return;

      add:
         out.add(key:thisboss, key:sub, data:thisboss, data:sub);
      return;

      find:
         link push;
         if missing(thisboss) | missing(sub) then goto outoffind; /* no point adding */
         if out.check(key:thisBoss, key:sub)=OK then goto outoffind; /* already there*/
         link add;
         /* follow */
         boss = sub;
         sub = '';
         /* find_next() gets reset as soon as another find() is called.
            thus we have to call find() and the number of find_next()
               for each sibling here. :-( */
         do i = 0 by 1 until(rc^=OK);
            rc = in.find();
            do j = 1 to i;
               rc = in.find_next();
            end;
            if rc = OK then link find;
         end;
         outoffind:
         link pop;
      return;

      pop:
         boss = stack_boss[top];
         sub = stack_sub[top];
         rc = stack_rc[top];
         i = stack_i[top];
         j = stack_j[top];
         top + (-1);
      return;

      push:
         top + 1;
         if top > &maxDepth then do;
            put "ERROR: stack overflow";
            stop;
         end;
         stack_boss[top] = boss;
         stack_sub[top] = sub;
         stack_rc[top] = rc;
         stack_i[top] = i;
         stack_j[top] = j;
      return;

      out:
         rc = out.output(dataset:"work.two");
      return;
   run;

   /* check */
   proc print data=two;
   run;
   /* on lst
   Obs    boss    sub
     1    Anna    Ben
     2    Anna    Bill
     3    Anna    Mary
     4    Anna    Tom
     5    Dave    Anna
     6    Dave    Ben
     7    Dave    Bill
     8    Dave    Erik
     9    Dave    Jim
    10    Dave    Kate
    11    Dave    Lisa
    12    Dave    Mary
    13    Dave    Tom
    14    Erik    Anna
    15    Erik    Ben
    16    Erik    Bill
    17    Erik    Mary
    18    Erik    Tom
    19    Jim     Kate
    20    Lisa    Jim
    21    Lisa    Kate
    22    Mary    Bill
    23    Mary    Tom
   */

Super User
Posts: 9,671

Re: comparing columns and creating new ones

Sorry.
My code above is wrong.But I am still fight for it because I am very very interested with your problem.The following is correct code I believed !!!!!!!


[pre]
data temp(where=(boss is not missing));
input boss $ sub $;
cards;
Jim Kate
Lisa Anna
Dave Lisa
Dave Erik
Dave Jim
Dave Peter
Anna Mary
Anna Ben
Anna Lisa
Erik Anna
Erik Dave
. Dave
;
run;

proc sort data=temp;
by boss;
run;

data temp_unique temp_dup;
set temp;
by boss;
if not(first.boss and last.boss) then output temp_dup;
else output temp_unique;
run;
data want;
declare hash ha (hashexp:10);
ha.definekey('_boss');
ha.definedata('sub');
ha.definedone();

do until(last);
set temp_unique(rename=(boss=_boss)) end=last;
ha.add();
end;

do until(_last);
set temp_unique end=_last;
output;
_boss=boss;
do until(rc ne 0 or count gt 10000);
/* 'count gt 10000' is to avoid dead loop ,Example: Dave->Jim Jim->Dave*/
count+1;
rc=ha.find();
if rc = 0 then output;
_boss=sub;
end;
end;
drop _boss rc;
run;



options mprint;
%macro all_comb;

proc sort data=temp_dup;
by boss;
run;
data order;
set temp_dup;
by boss;
retain start;
if first.boss then start= _n_;
if last.boss then do; end= _n_; output; end;
run;
data _null_;
set order end=last;
call symputx(cats('do',_n_),'do '||boss||'='||strip(start)||' to '||strip(end)||' ;' );
call symputx(cats('set',_n_),'set temp_dup point='||boss||'; output;');
call symputx(cats('end',_n_),'end;');
if last then call symputx('nobs',_n_);
run;

/* To get all the combination of boss who has duplicated value*/
data comb;
%do i=1 %to &nobs;
&&do&i
%end;
count+1;
%do j=1 %to &nobs;
&&set&j
%end;
%do k=1 %to &nobs;
&&end&k
%end;
stop;
run;

%mend all_comb;
%all_comb
data _null_;
set Comb end=last;
if last then call symputx('count',count);
run;
%put _user_;

%macro loop;
%do i=1 %to &count ;
data single;
set comb;
if count eq &i;
run;

data middle;
set temp_unique single;
run;

data want_middle;
declare hash ha (hashexp:10);
ha.definekey('_boss');
ha.definedata('sub');
ha.definedone();

do until(last);
set middle(rename=(boss=_boss)) end=last;
ha.add();
end;

do until(_last);
set middle end=_last;
output;
_boss=boss;
do until(rc ne 0 or count gt 10000);
count+1;
rc=ha.find();
if rc = 0 then output;
_boss=sub;
end;
end;
drop _boss rc;
run;
proc append base=want data=want_middle force;
run;
%end;

%mend loop;

%loop




proc sort data=want out=final_want(drop=count) nodupkey;
by boss sub;
run;







[/pre]


And I also consider an problem about your question
Namely, dead loop.For example: Dave->Jim Jim->Dave

Cheers!!!!
Ksharp

Message was edited by: Ksharp Message was edited by: Ksharp
Ask a Question
Discussion stats
  • 5 replies
  • 286 views
  • 0 likes
  • 5 in conversation