<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to find link between nodes in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341858#M272785</link>
    <description>&lt;P&gt;Thanks for your reply. but your program is finding the link between node which is not the interests of mine. &lt;BR /&gt;I am seeking for the application_id that linked to address_id (I specified) either directly or indirectly.&lt;/P&gt;</description>
    <pubDate>Fri, 17 Mar 2017 04:35:27 GMT</pubDate>
    <dc:creator>gyambqt</dc:creator>
    <dc:date>2017-03-17T04:35:27Z</dc:date>
    <item>
      <title>How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341819#M272781</link>
      <description>&lt;P&gt;Hi Experts,&lt;/P&gt;
&lt;P&gt;I want to find all the links&amp;nbsp;that are connected to&amp;nbsp;the address_ID I specified directly or indirectly (or any other nodeID, like email_ID, IP_ID&amp;nbsp;etc.) &amp;nbsp;and return only application_ID. for example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Application_ID&amp;nbsp;&amp;nbsp; Email_ID&amp;nbsp;&amp;nbsp;IP_ID&amp;nbsp;Address_ID phone_ID&lt;/P&gt;
&lt;P&gt;1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;email1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;ip1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;address1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;phone1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;email2&amp;nbsp;&amp;nbsp;&amp;nbsp; ip2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; address1&amp;nbsp;&amp;nbsp;&amp;nbsp; phone2&lt;/P&gt;
&lt;P&gt;3&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; email3&amp;nbsp;&amp;nbsp; ip2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;address2&amp;nbsp;&amp;nbsp;&amp;nbsp; phone5&lt;/P&gt;
&lt;P&gt;4&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; email5&amp;nbsp;&amp;nbsp;&amp;nbsp; ip1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; address3&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; phone13&lt;/P&gt;
&lt;P&gt;5&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; email1&amp;nbsp; ip13&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; address13&amp;nbsp; phone13&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;if I specify parameter address_id=address1&lt;/P&gt;
&lt;P&gt;then the program should able to return all 5 applications because&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;address1 is connected to ip1 and ip2 from application_id 1 and application_id 2 so both application should be returned in the final output&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;address1 is connected to&amp;nbsp;ip1 which also&amp;nbsp; connected to application_id 4 so appliation_id 4 should be returned in the final output&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;address1 is connected to ip2 which is connected to application_id 3 so applicaiton_id 3 should be returned in the final output&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;address1 is connected to email1 which is conneted to application_id 5 so application_id 5 should be returned in the final output&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I have used pro sql join to handle this problem but it seems to be inefficient as I need to specify number of time the iteration should run.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I want to achieve the outcome using hash table if possible!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I hope I made it clear.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 17 Mar 2017 00:07:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341819#M272781</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-17T00:07:28Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341842#M272782</link>
      <description>&lt;P&gt;Build the set of all connections, then use the subgraphs macro described &lt;A href="https://communities.sas.com/t5/General-SAS-Programming/Finding-all-connected-components-in-a-graph/m-p/230156#M33789" target="_self"&gt;here&lt;/A&gt;. It is a hash solution to this problem.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Mar 2017 03:02:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341842#M272782</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-03-17T03:02:42Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341849#M272783</link>
      <description>&lt;PRE&gt;
Since those four variables are independent respectively,
it is easy for SQL.



data have;
input Application_ID   (Email_ID  IP_ID Address_ID phone_ID) ( :$20.);
cards;
1                            email1    ip1       address1     phone1    
2                            email2    ip2        address1    phone2
3                            email3   ip2         address2    phone5
4                            email5    ip1         address3     phone13
5                            email1  ip13       address13  phone13
;
run;

%let add=address1;

proc sql;


select application_id
 from have
  where email_id in
(select email_id from have where address_id="&amp;amp;add")
  
union

select application_id
 from have
  where ip_id in
(select ip_id from have where address_id="&amp;amp;add")

union

select application_id
 from have
  where phone_id in
(select phone_id from have where address_id="&amp;amp;add")

 ;
 quit;
  
  

&lt;/PRE&gt;</description>
      <pubDate>Fri, 17 Mar 2017 03:31:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341849#M272783</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-17T03:31:48Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341855#M272784</link>
      <description>&lt;P&gt;Hi Ksharp,&lt;/P&gt;
&lt;P&gt;Thanks for your reply but I think your code only return part of the node that application linked to, it can go further than that.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;like the following example:&lt;/P&gt;
&lt;P&gt;Application_ID Email_ID IP_ID Address_ID phone_ID&lt;/P&gt;
&lt;P&gt;1 email1 ip1 address1 phone1&lt;/P&gt;
&lt;P&gt;2 email2 ip1 address2 phone2&lt;/P&gt;
&lt;P&gt;3 email2 ip2 address3 phone3&lt;/P&gt;
&lt;P&gt;you code only return application_id 1 and application_id 2&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;but if you look at the dataset address1 from application_id 1 is linked to application_id 2 via ip1 and&amp;nbsp;application_id 2 is linked to application_id 3 via email2 so application_id 1 is linked to application_id 3 indirectly&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Mar 2017 04:14:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341855#M272784</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-17T04:14:23Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341858#M272785</link>
      <description>&lt;P&gt;Thanks for your reply. but your program is finding the link between node which is not the interests of mine. &lt;BR /&gt;I am seeking for the application_id that linked to address_id (I specified) either directly or indirectly.&lt;/P&gt;</description>
      <pubDate>Fri, 17 Mar 2017 04:35:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341858#M272785</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-17T04:35:27Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341860#M272786</link>
      <description>&lt;P&gt;Replace &amp;lt;= by &amp;lt; if you don't want clusters with a single ID&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
length Application_ID 8 Email_ID IP_ID Address_ID phone_ID $12;
input Application_ID   Email_ID  IP_ID Address_ID phone_ID;
datalines;
1                            email1    ip1       address1     phone1    
2                            email2    ip2        address1    phone2
3                            email3   ip2         address2    phone5
4                            email5    ip1         address3     phone13
5                            email1  ip13       address13  phone13
6                            email9  ip19       address19  phone19
;

proc sql;
create table arcs as
select 
    a.application_ID as from,
    b.application_ID as to
from 
    have as a inner join
    have as b
    on 
        a.Email_ID = b.Email_ID or
        a.IP_ID = b.IP_ID or
        a.Address_ID = b.Address_ID or 
        a.phone_ID = b.phone_ID
where a.application_ID &amp;lt;= b.application_ID;
quit;

%include "&amp;amp;sasforum\subgraphsmacro.sas";

%subgraphs(arcs);

proc print data=clusters; run;

&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 17 Mar 2017 04:40:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341860#M272786</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-03-17T04:40:19Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341862#M272787</link>
      <description>&lt;P&gt;Hi PG,&lt;/P&gt;
&lt;P&gt;I think your program only find direct relationship between two applications for a specified address_id. but it failed to find the indirect relationship like the following data.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1 email1 ip1 address1 phone1&lt;/P&gt;
&lt;P&gt;2 email2 ip1 address2 phone2&lt;/P&gt;
&lt;P&gt;3 email2 ip2 address3 phone3&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1 is having relationship with 3 if address1&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN class="token punctuation"&gt;another question is in the subgraph macro, it has a macro parameter&amp;amp;colon and it is not defined anywhere.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN class="token punctuation"&gt;how do I find this value.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN class="token punctuation"&gt;thanks&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Mar 2017 05:08:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341862#M272787</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-17T05:08:15Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341919#M272788</link>
      <description>&lt;PRE&gt;
OK. I see what is different now.
Try this one.





data x;
input Application_ID   (Email_ID  IP_ID Address_ID phone_ID) ( :$20.);
cards;
1                            email1    ip1       address1     phone1    
2                            email2    ip2        address1    phone2
3                            email3   ip2         address2    phone5
4                            email5    ip1         address3     phone13
5                            email1  ip13       address13  phone13
11 email21 ip21 address21 phone21
12 email22 ip21 address22 phone22
13 email22 ip22 address23 phone23
;
run;

data have;
 set x;
 array x{*} $ Email_ID  IP_ID Address_ID phone_ID;
 length from to $ 100;
 do i=1 to dim(x)-1;
  from=x{i};
  do j=i+1 to dim(x);
   to=x{j};output;
  end;
 end;
 keep from to;
run;



data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;

data final_want;
if _n_=1 then do;
 if 0 then set want;
 declare hash h(dataset:'want');
 h.definekey('node');
 h.definedata('household');
 h.definedone();
end;
 set x;
 array x{*} $ Email_ID  IP_ID Address_ID phone_ID;
 do i=1 to dim(x);
  node=x{i};call missing(household);
  if h.find()=0 then leave;
 end;
 drop i node;
run;
proc print noobs;run;

&lt;/PRE&gt;</description>
      <pubDate>Fri, 17 Mar 2017 10:31:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341919#M272788</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-17T10:31:26Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341921#M272789</link>
      <description>&lt;PRE&gt;
The last data step could be as simple as the following.




data final_want;
if _n_=1 then do;
 if 0 then set want;
 declare hash h(dataset:'want');
 h.definekey('node');
 h.definedata('household');
 h.definedone();
end;
 set x;
  node=email_id;
  h.find();
 drop  node;
run;
proc print noobs;run;

&lt;/PRE&gt;</description>
      <pubDate>Fri, 17 Mar 2017 10:37:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/341921#M272789</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-17T10:37:55Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342056#M272790</link>
      <description>&lt;P&gt;The procedure I gave you does find the correct answer. Unfortunately the forum parser garbled my macro code. Please try again with the attached version.&lt;/P&gt;</description>
      <pubDate>Fri, 17 Mar 2017 17:21:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342056#M272790</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-03-17T17:21:23Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342453#M272791</link>
      <description>&lt;P&gt;Thanks for your reply!&lt;/P&gt;
&lt;P&gt;your code is very nice! but to further extend a bit, if the data contain missing values like one or all the columns are blank for some applicaitons, how do you handle this problem?&lt;/P&gt;
&lt;P&gt;I modified your code a little like below:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="3"&gt;&lt;STRONG&gt;data&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; have;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;set&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; n;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;array&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; x{*} $ Customer_Contact_Number name_dob_cluster1 driver_cluster1 address_75_cluster1;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;if&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; driver_cluster1=&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;'1'&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;then&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt; driver_cluster1=&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;''&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;length&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; from to $ &lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="3"&gt;100&lt;/FONT&gt;&lt;/STRONG&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;do&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; i=&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="3"&gt;1&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;to&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; dim(x)-&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="3"&gt;1&lt;/FONT&gt;&lt;/STRONG&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;from=x{i};&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;do&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; j=i+&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="3"&gt;1&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;to&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; dim(x);&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;to=x{j};&lt;/P&gt;
&lt;P&gt;　&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;if&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; from^=&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;''&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;or&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; to^=&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;''&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;then&lt;/FONT&gt;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;output&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;end&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;end&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;keep&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; from to;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="3"&gt;&lt;STRONG&gt;run&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="3"&gt;data&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; have1;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;set&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; have;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;if&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; from=&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;''&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;then&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;do&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt; from1+&lt;/FONT&gt;&lt;FONT color="#008080" face="Courier New" size="3"&gt;1&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt; from=cats(&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;'a'&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;,from1);&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;end&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;if&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt; to=&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;''&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Courier New" size="3"&gt;then&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;do&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt; to1+&lt;/FONT&gt;&lt;FONT color="#008080" face="Courier New" size="3"&gt;1&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt; to=cats(&lt;/FONT&gt;&lt;FONT color="#800080" face="Courier New" size="3"&gt;'b'&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;,to1);&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="3"&gt;end&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="3"&gt;run&lt;/FONT&gt;&lt;FONT face="Courier New" size="3"&gt;;&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt;so if all of the columns are blank then it should be removed. if one or some of the columns are blank then the blank columns will be assigned a ID like a1, a2 a3 (all the blank column must have a distinct id)&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="Courier New" size="3"&gt;what do you think?&lt;/FONT&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2017 03:40:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342453#M272791</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-20T03:40:32Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342466#M272792</link>
      <description>&lt;PRE&gt;
You are on the right way.


data have;
 set x;
 array x{*} $ Email_ID  IP_ID Address_ID phone_ID;
 length from to $ 100;
 do i=1 to dim(x)-1;
  from=x{i};
  do j=i+1 to dim(x);
   to=x{j};

   if not missing(x{i}) and not missing(x{j}) then output;
  
  end;
 end;
 keep from to;
run;



"if one or some of the columns are blank then the blank columns will be assigned a ID like a1, a2 a3 "
I don't understand what you mean. Post an example to describe your question.


&lt;/PRE&gt;</description>
      <pubDate>Mon, 20 Mar 2017 05:38:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342466#M272792</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-20T05:38:57Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342471#M272793</link>
      <description>&lt;P&gt;I realised using the following statment won't be able to find the correct result because It will ignore both from and to value if one of them is missing.&lt;/P&gt;
&lt;P&gt;for example:&lt;/P&gt;
&lt;P&gt;from to&lt;/P&gt;
&lt;P&gt;1 &amp;nbsp; &amp;nbsp; &amp;nbsp;missing&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;then that record will be ignored. However it should be included as from=1 might connect to some applications&lt;/P&gt;
&lt;PRE&gt; if not missing(x{i}) and not missing(x{j}) then output;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;Also I think something need to be done at X table by giving a distinct number to all the missing value (so in other words, treat the missing value as distinct ID) so as to get the application number at the last step.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;anything wrong please let me know thank!&lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2017 05:59:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342471#M272793</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-20T05:59:45Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342474#M272794</link>
      <description>&lt;P&gt;OK. I know what you mean now.&lt;/P&gt;
&lt;P&gt;The best way is filling these missing value as unique value just as you said before.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The following code could give you what you want.&lt;/P&gt;
&lt;P&gt;CODE NOT TESTED.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;data have;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;set have;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;array x{*} $ _character_;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;do i=1 to dim(x);&lt;/P&gt;
&lt;P&gt;&amp;nbsp; &amp;nbsp;if missing(x{i}) then do; n+1; x{i}=cats('a',n);end;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;end;&lt;/P&gt;
&lt;P&gt;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;And make sure your character variable's length is big enough to hold 'a9999999' .&lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2017 06:15:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342474#M272794</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-20T06:15:43Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342478#M272795</link>
      <description>&lt;P&gt;&lt;SPAN&gt;cool, I have done things similar but takes ages to run for like 1.3 miliions applications. Maybe I need to find a way to make it more efficent processing. thanks a lot!!!!!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2017 06:37:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342478#M272795</guid>
      <dc:creator>gyambqt</dc:creator>
      <dc:date>2017-03-20T06:37:23Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342516#M272796</link>
      <description>&lt;P&gt;Actually you do not have to transform all the missing into axxxxx .&lt;/P&gt;
&lt;P&gt;The only problem exist in this scenario is there is only one non-missing value in obs.like:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;id &amp;nbsp;address phone email ..........&lt;/P&gt;
&lt;P&gt;1 &amp;nbsp; &amp;nbsp;xx &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; .......rest are all missing .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;So you can reduce the number of node like:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have;
 set x;
 array x{*} $ Email_ID  IP_ID Address_ID phone_ID;
 length from to $ 100;&lt;BR /&gt;&lt;BR /&gt;if cmiss(of x{*})=dim(x) then delete;&lt;BR /&gt; else if cmiss(of x{*})=1 then do;&lt;BR /&gt;     from=coalescec(of x{*});to=uuidgen(0);output; end;&lt;BR /&gt; end;&lt;BR /&gt;  else do;&lt;BR /&gt;
          do i=1 to dim(x)-1;
           from=x{i};
            do j=i+1 to dim(x);
             to=x{j};
             if not missing(from) and not missing(to) then output;
             end;
          end;&lt;BR /&gt;&lt;BR /&gt;       end;&lt;BR /&gt;&lt;BR /&gt;
 keep from to;
run;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2017 10:07:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342516#M272796</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-20T10:07:21Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342539#M272797</link>
      <description>&lt;P&gt;In the last data step, you could add one more option in hash table to fast it.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;declare hash h(dataset:'want', hashexp: 20);&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2017 11:53:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/342539#M272797</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-20T11:53:43Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/346176#M272798</link>
      <description>&lt;P&gt;Final version code.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;

data x;
input Application_ID   (Email_ID  IP_ID Address_ID phone_ID) ( :$20.);
cards;
1                            email1    ip1       address1     phone1    
2                            email2    ip2        address1    phone2
3                            email3   ip2         address2    phone5
4                            email5    ip1         address3     phone13
5                            email1  ip13       address13  phone13
11 email21 ip21 address21 phone21
12 email22 ip21 address22 phone22
13 email22 ip22 address23 phone23
;
run;

data have;
 set x;
 array x{*} $ Email_ID  IP_ID Address_ID phone_ID;
 length from to $ 100;

if cmiss(of x{*})=dim(x) then delete;
 else if cmiss(of x{*})=1 then do;
     from=coalescec(of x{*});to=uuidgen(0);output; 
 end;
  else do;

          do i=1 to dim(x)-1;
           from=x{i};
            do j=i+1 to dim(x);
             to=x{j};
             if not missing(from) and not missing(to) then output;
             end;
          end;

       end;


 keep from to;
run;




data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;

data final_want;
if _n_=1 then do;
 if 0 then set want;
 declare hash h(dataset:'want');
 h.definekey('node');
 h.definedata('household');
 h.definedone();
end;
 set x;
 array x{*} $ Email_ID  IP_ID Address_ID phone_ID;
 do i=1 to dim(x);
  node=x{i};call missing(household);
  if h.find()=0 then leave;
 end;
 drop i node;
run;
proc print noobs;run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 31 Mar 2017 15:16:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/346176#M272798</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-31T15:16:28Z</dc:date>
    </item>
    <item>
      <title>Re: How to find link between nodes</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/346272#M272799</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;SAS/WPS/R: How to find links between nodes

link to this message
https://goo.gl/KgN8Xw
https://communities.sas.com/t5/General-SAS-Programming/How-to-find-link-between-nodes/td-p/341819

HAVE
====

Up to 40 obs SD1.HAVE total obs=8

Obs                    STR

 1     za1 &amp;gt; email1 &amp;gt; ip1 &amp;gt; address1 &amp;gt; phone1       &amp;gt;&amp;gt; first cluster
 2     za2 &amp;gt; email2 &amp;gt; ip2 &amp;gt; address1 &amp;gt; phone2
 3     za3 &amp;gt; email3 &amp;gt; ip2 &amp;gt; address2 &amp;gt; phone5
 4     za4 &amp;gt; email5 &amp;gt; ip1 &amp;gt; address3 &amp;gt; phone13
 5     za5 &amp;gt; email1 &amp;gt; ip13 &amp;gt; address13 &amp;gt; phone13

 6     za11 &amp;gt; email21 &amp;gt; ip21 &amp;gt; address21 &amp;gt; phone21  &amp;gt;&amp;gt; second cluster
 7     za12 &amp;gt; email22 &amp;gt; ip21 &amp;gt; address22 &amp;gt; phone22
 8     za13 &amp;gt; email22 &amp;gt; ip22 &amp;gt; address23 &amp;gt; phone23

 6 and 7 are connected by ip21
 7 and 8 are connected by email22


WANT ( There are two non-connected clusters )
==============================================

CLUSTER                    UNIQUE CONNECTED CLUSTERS (ie ip2 only occurs once)

 1     za1&amp;gt;email1&amp;gt;ip1&amp;gt;address1&amp;gt;phone1&amp;gt;za2&amp;gt;email2&amp;gt;ip2&amp;gt;phone2&amp;gt;za3&amp;gt;email3&amp;gt;
       address2&amp;gt;phone5&amp;gt;za4&amp;gt;email5&amp;gt;address3&amp;gt;phone13&amp;gt;za5&amp;gt;ip13&amp;gt;address13

 2     za11&amp;gt;email21&amp;gt;ip21&amp;gt;address21&amp;gt;phone21&amp;gt;za12&amp;gt;email22&amp;gt;address22&amp;gt;phone22&amp;gt;za13&amp;gt;ip22&amp;gt;address23&amp;gt;phone23

WORKING CODE
============

   R - all other code is prep for input and output

        cl &amp;lt;- clusters(graph.data.frame(combspl))$membership[-(1:length(spl))];

    igraph package is heavily used and debugged (not true of all R packages)

FULL SOLUTION

*                _                  _       _
 _ __ ___   __ _| | _____        __| | __ _| |_ __ _
| '_ ` _ \ / _` | |/ / _ \_____ / _` |/ _` | __/ _` |
| | | | | | (_| |   &amp;lt;  __/_____| (_| | (_| | || (_| |
|_| |_| |_|\__,_|_|\_\___|      \__,_|\__,_|\__\__,_|

;

options validvarname=upcase;
libname sd1 "d:/sd1";
data sd1.have(keep=str);
input (Application_ID Email_ID IP_ID Address_ID phone_ID) ( :$20.);
array chr _character_;
str=catx('&amp;gt;',of _character_);
cards4;
za1 email1 ip1 address1 phone1
za2 email2 ip2 address1 phone2
za3 email3 ip2 address2 phone5
za4 email5 ip1 address3 phone13
za5 email1 ip13 address13 phone13
za11 email21 ip21 address21 phone21
za12 email22 ip21 address22 phone22
za13 email22 ip22 address23 phone23
;;;;
run;quit;


%utl_submit_wps64('
libname sd1 "d:/sd1";
options set=R_HOME "C:/Program Files/R/R-3.3.2";
libname wrk "%sysfunc(pathname(work))";
proc r;
submit;
source("c:/Program Files/R/R-3.3.2/etc/Rprofile.site",echo=T);
library(igraph);
library(haven);
data &amp;lt;-read_sas("d:/sd1/have.sas7bdat");
data&amp;lt;-as.character(data$STR);
spl &amp;lt;- strsplit(data,"&amp;gt;");
combspl &amp;lt;- data.frame(
  grp = rep(seq_along(spl),lengths(spl)),
  val = unlist(spl)
);
cl &amp;lt;- clusters(graph.data.frame(combspl))$membership[-(1:length(spl))];
dat &amp;lt;- data.frame(cl);
dat[,2] &amp;lt;- row.names(dat);
a &amp;lt;- character(0);
for (i in 1:max(cl)) {
  a[i] &amp;lt;- paste(paste0(dat[(dat[,1] == i),][,2]), collapse="&amp;gt;");
};
endsubmit;
import r=a data=wrk.linkages;
run;quit;
');

proc print data=linkages width=min;
run;quit;

Up to 40 obs from linkages total obs=2

CLUSTERS

 1     za1 &amp;gt; email1 &amp;gt; ip1 &amp;gt; address1 &amp;gt; phone1 &amp;gt; za2 &amp;gt; email2 &amp;gt; ip2 &amp;gt; phone2 &amp;gt;
       za3 &amp;gt; email3 &amp;gt; address2 &amp;gt; phone5 &amp;gt; za4 &amp;gt; email5 &amp;gt; address3 &amp;gt; phone13 &amp;gt;
       za5 &amp;gt; ip13 &amp;gt; address13

 2     za11 &amp;gt; email21 &amp;gt; ip21 &amp;gt; address21 &amp;gt; phone21 &amp;gt; za12 &amp;gt; email22 &amp;gt;
       address22 &amp;gt; phone22 &amp;gt; za13 &amp;gt; ip22 &amp;gt; address23 &amp;gt; phone23



&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 31 Mar 2017 18:26:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-find-link-between-nodes/m-p/346272#M272799</guid>
      <dc:creator>rogerjdeangelis</dc:creator>
      <dc:date>2017-03-31T18:26:24Z</dc:date>
    </item>
  </channel>
</rss>

