<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Create GROUPING variable based on values in multiple varaibles that link each other in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964246#M375544</link>
    <description>&lt;P&gt;I do not totally follow your example.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It kind of looks like you are using REP as a pointer to the observation with the matching value of NUM within the same SUB.&amp;nbsp; But you are doing some type of conversion since REP='002' is NOT the same string as NUM='2'.&amp;nbsp; Did you intend for NUM and REP to be numeric variables instead of character?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Also why does REP='003' get GRP='grp002'?&amp;nbsp; Why not 'grp003'?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is your intent to treat the (NUM,REP) pairs as edges in a graph with the goal of find the connected sub-graphs?&amp;nbsp; If so then perhaps you want to use this&amp;nbsp;&lt;A href="https://github.com/sasutils/macros/blob/master/subnet.sas" target="_self"&gt;%SUBNET() macro&lt;/A&gt;&amp;nbsp;.&amp;nbsp; (Note you might need to enhance the macro to allow BY group processing.&amp;nbsp; Or split the data into separate datasets for each SUB.&amp;nbsp; Or convert your NUM and REP values into composite values that include the SUB id so they are unique.)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have ;
  input sub $ num rep;
cards;
001 1 . 
001 2 . 
001 3 002
001 4 . 
001 5 003
001 6 . 
002 1 . 
002 2 . 
002 3 . 
002 4 001
;

%subnet(in=have,out=subnet,from=rep,to=num,subnet=subnet,directed=1);

proc sql;
create table want as 
  select distinct a.*,b.subnet as grp
  from have a left join subnet b
    on a.sub = b.sub and (a.num=b.num or a.num=b.rep )
  order by 1,2
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Result&lt;/P&gt;
&lt;PRE&gt;Obs    sub    num    rep    grp

  1    001     1      .      .
  2    001     2      .      2
  3    001     3      2      2
  4    001     4      .      .
  5    001     5      3      2
  6    001     6      .      .
  7    002     1      .      1
  8    002     2      .      .
  9    002     3      .      .
 10    002     4      1      1
&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 14 Apr 2025 17:04:15 GMT</pubDate>
    <dc:creator>Tom</dc:creator>
    <dc:date>2025-04-14T17:04:15Z</dc:date>
    <item>
      <title>Create GROUPING variable based on values in multiple varaibles that link each other</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964213#M375539</link>
      <description>&lt;P&gt;Hi Experts,&amp;nbsp;&lt;/P&gt;&lt;P&gt;I m looking to create the grp varaible for records where the rep value linked with num value. Grp should get the first num value to link the group. THANK YOU SO MUCH FOR YOUR VALUEBLE TIME ON THIS.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Have this below dataset&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data xx;&lt;BR /&gt;length sub num rep $20;&lt;BR /&gt;sub = '001';num='1';rep = '';output;&lt;BR /&gt;sub = '001';num='2';rep = '';output;&lt;BR /&gt;sub = '001';num='3';rep = '002';output;&lt;BR /&gt;sub = '001';num='4';rep = '';output;&lt;BR /&gt;sub = '001';num='5';rep = '003';output;&lt;BR /&gt;sub = '001';num='6';rep = '';output;&lt;BR /&gt;sub = '002';num='1';rep = '';output;&lt;BR /&gt;sub = '002';num='2';rep = '';output;&lt;BR /&gt;sub = '002';num='3';rep = '';output;&lt;BR /&gt;sub = '002';num='4';rep = '001';output;&lt;BR /&gt;run;&lt;/P&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;sub&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;num&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;rep&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;003&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;and want the below dataset&lt;/P&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;sub&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;num&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;rep&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;grp&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;grp002&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;grp002&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;003&lt;/TD&gt;&lt;TD&gt;grp002&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;grp001&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;grp001&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;</description>
      <pubDate>Mon, 14 Apr 2025 13:50:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964213#M375539</guid>
      <dc:creator>Mr_sassy_sug</dc:creator>
      <dc:date>2025-04-14T13:50:13Z</dc:date>
    </item>
    <item>
      <title>Re: Create GROUPING variable based on values in multiple varaibles that link each other</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964217#M375540</link>
      <description>&lt;P&gt;something like this?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data xx;
length sub num rep $20;
sub = '001';num='1';rep = '';output;
sub = '001';num='2';rep = '';output;
sub = '001';num='3';rep = '002';output;
sub = '001';num='4';rep = '';output;
sub = '001';num='5';rep = '003';output;
sub = '001';num='6';rep = '';output;
sub = '002';num='1';rep = '';output;
sub = '002';num='2';rep = '';output;
sub = '002';num='3';rep = '';output;
sub = '002';num='4';rep = '001';output;
run;

data want;
  do _N_ = 1 by 1 until(last.sub);
    set xx;
    by sub;
    min=min(input(rep,best32.),min);
  end;

  do _N_ = 1 to _N_;
    set xx;
    if num = min OR rep NE " " then grp = "grp"!!put(min,z3.);
                               else grp = " ";
    output;
  end;
run;
proc print;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Bart&lt;/P&gt;</description>
      <pubDate>Mon, 14 Apr 2025 14:23:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964217#M375540</guid>
      <dc:creator>yabwon</dc:creator>
      <dc:date>2025-04-14T14:23:48Z</dc:date>
    </item>
    <item>
      <title>Re: Create GROUPING variable based on values in multiple varaibles that link each other</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964246#M375544</link>
      <description>&lt;P&gt;I do not totally follow your example.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It kind of looks like you are using REP as a pointer to the observation with the matching value of NUM within the same SUB.&amp;nbsp; But you are doing some type of conversion since REP='002' is NOT the same string as NUM='2'.&amp;nbsp; Did you intend for NUM and REP to be numeric variables instead of character?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Also why does REP='003' get GRP='grp002'?&amp;nbsp; Why not 'grp003'?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is your intent to treat the (NUM,REP) pairs as edges in a graph with the goal of find the connected sub-graphs?&amp;nbsp; If so then perhaps you want to use this&amp;nbsp;&lt;A href="https://github.com/sasutils/macros/blob/master/subnet.sas" target="_self"&gt;%SUBNET() macro&lt;/A&gt;&amp;nbsp;.&amp;nbsp; (Note you might need to enhance the macro to allow BY group processing.&amp;nbsp; Or split the data into separate datasets for each SUB.&amp;nbsp; Or convert your NUM and REP values into composite values that include the SUB id so they are unique.)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Example:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have ;
  input sub $ num rep;
cards;
001 1 . 
001 2 . 
001 3 002
001 4 . 
001 5 003
001 6 . 
002 1 . 
002 2 . 
002 3 . 
002 4 001
;

%subnet(in=have,out=subnet,from=rep,to=num,subnet=subnet,directed=1);

proc sql;
create table want as 
  select distinct a.*,b.subnet as grp
  from have a left join subnet b
    on a.sub = b.sub and (a.num=b.num or a.num=b.rep )
  order by 1,2
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Result&lt;/P&gt;
&lt;PRE&gt;Obs    sub    num    rep    grp

  1    001     1      .      .
  2    001     2      .      2
  3    001     3      2      2
  4    001     4      .      .
  5    001     5      3      2
  6    001     6      .      .
  7    002     1      .      1
  8    002     2      .      .
  9    002     3      .      .
 10    002     4      1      1
&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 14 Apr 2025 17:04:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964246#M375544</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2025-04-14T17:04:15Z</dc:date>
    </item>
    <item>
      <title>Re: Create GROUPING variable based on values in multiple varaibles that link each other</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964253#M375546</link>
      <description>&lt;P&gt;Thank you so much, you got it right and it is also working based on the situation, really appreciate your help.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 14 Apr 2025 17:56:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964253#M375546</guid>
      <dc:creator>Mr_sassy_sug</dc:creator>
      <dc:date>2025-04-14T17:56:25Z</dc:date>
    </item>
    <item>
      <title>Re: Create GROUPING variable based on values in multiple varaibles that link each other</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964290#M375563</link>
      <description>&lt;P&gt;I think this question is more complicated that you demostrator with this simple data.&lt;/P&gt;
&lt;P&gt;I think it is searching a tree problem.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data xx;
length sub num rep $20;
sub = '001';num='1';rep = '';output;
sub = '001';num='2';rep = '';output;
sub = '001';num='3';rep = '002';output;
sub = '001';num='4';rep = '';output;
sub = '001';num='5';rep = '003';output;
sub = '001';num='6';rep = '';output;
sub = '002';num='1';rep = '';output;
sub = '002';num='2';rep = '';output;
sub = '002';num='3';rep = '';output;
sub = '002';num='4';rep = '001';output;
run;

data have;
set xx(where=(num is not missing and rep is not missing));
length from to $ 30;
from=catx('|',sub,input(rep,best.));
to=catx('|',sub,input(num,best.));
keep from to;
run;

data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;


/*****Find the ancestor/top  one *******/
proc sql;
create table ancestor as
select node,max(grp) as grp from
(


select a.*,b.grp from want as a left join 
(
select from,'grp'||put(input(scan(from,-1,'|'),best.),z10.) as grp from have where from not in (select to from have)
) as b
on a.node=b.from


) group by household;
quit;

data final_want;
if _n_=1 then do;
 if 0 then set ancestor;
 declare hash h(dataset:'ancestor');
 h.definekey('node');
 h.definedata('grp');
 h.definedone();
end;
set xx;
length from to $ 30;
call missing(grp);
if not missing(rep)  then do;
 from=catx('|',sub,input(rep,best.));rc=h.find(key:from);
end;
if not missing(num) then do;
 to=catx('|',sub,input(num,best.));rc=h.find(key:to);
end;
drop from to rc node;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Ksharp_0-1744702554479.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/106232iD8590FCCDE2CDFDC/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Ksharp_0-1744702554479.png" alt="Ksharp_0-1744702554479.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 15 Apr 2025 07:36:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Create-GROUPING-variable-based-on-values-in-multiple-varaibles/m-p/964290#M375563</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2025-04-15T07:36:04Z</dc:date>
    </item>
  </channel>
</rss>

