<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Find all linked ids in a list of id pairs (with multiple links) in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514291#M138677</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'd like to come up with a more efficient way of searching a fairly large list of id pairs (let's say 200k+ id pairs) and creating chains of ids from them.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Example (12 pairs of linked ids, 3 chains of ids in total):&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data have;&lt;/P&gt;&lt;P&gt;&amp;nbsp; length ids $7;&lt;/P&gt;&lt;P&gt;&amp;nbsp; input ids $;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;datalines;&lt;/P&gt;&lt;P&gt;123-456&lt;/P&gt;&lt;P&gt;111-222&lt;/P&gt;&lt;P&gt;111-333&lt;/P&gt;&lt;P&gt;222-333&lt;/P&gt;&lt;P&gt;333-444&lt;/P&gt;&lt;P&gt;333-555&lt;/P&gt;&lt;P&gt;666-777&lt;/P&gt;&lt;P&gt;444-888&lt;/P&gt;&lt;P&gt;444-555&lt;/P&gt;&lt;P&gt;888-777&lt;/P&gt;&lt;P&gt;123-987&lt;/P&gt;&lt;P&gt;654-321&lt;/P&gt;&lt;P&gt;;&lt;/P&gt;&lt;P&gt;run;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The result I want is:&lt;/P&gt;&lt;P&gt;123-456-987&lt;/P&gt;&lt;P&gt;111-222-333-444-555-888-777-666&lt;/P&gt;&lt;P&gt;654-321&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The output order is not important (e.g. 456-987-123 would still be fine in the first chain). In the real data each id is 14 characters long.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a macro which gives me this result (below) but to process 19k pairs of linked ids takes around 20 minutes which I'd really like to improve (ideally it would process 200k+ pairs in around 20 minutes).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Can anyone help me improve the efficiency of the logic I have, or, describe an alternative approach?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I found this link -&amp;nbsp;&lt;A href="http://support.sas.com/kb/26/160.html" target="_blank"&gt;http://support.sas.com/kb/26/160.html&lt;/A&gt; - which does something similar using hash tables (and runs in seconds with a test dataset of 100k+ id pairs) but if I break my 'have' dataset down into id and new_id and run it through that code it doesn't find all the links and produces duplicate chains.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This is my current code:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%macro chain();&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* count the number of links yet to become part of a chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; proc sql noprint; select count(*) into :link_count from work.have; quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; %do %until (%eval(&amp;amp;link_count = 0));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* create a one row table containing each id pair */&lt;/P&gt;&lt;P&gt;&amp;nbsp; proc transpose data=work.have out=work.have_t (drop=_name_) prefix = lp_;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; var ids;&lt;/P&gt;&lt;P&gt;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* do the linking */&lt;/P&gt;&lt;P&gt;&amp;nbsp; data work.links (keep=all_linked_ids);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; set work.have_t;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; attrib all_linked_ids = $50;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; array linked_ids[*] lp_: _temporary_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; do link_being_checked = 1 to 1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; all_linked_ids = linked_ids[link_being_checked];&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; do check_pstn = 1 to dim(linked_ids);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* if the id on the left of the dash matches an id already in the chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* and the id on the right of the dash hasn't already been added to &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* the chain, add it to the chain and reset the check_pstn var so &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* every id in the chain is always tested vs every available id. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if findw(all_linked_ids, substr(linked_ids[check_pstn], 1, 3) and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn ^= link_being_checked and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; findw(all_linked_ids, substr(linked_ids[check_pstn], 5, 3)) &amp;lt; 1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; then do; all_linked_ids = catx('-', all_linked_ids, substr(linked_ids[check_pstn], 5, 3));&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn = 1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; else&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* if the id on the right of the dash matches an id already in the chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* and the id on the left of the dash hasn't already been added to &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* the chain, add it to the chain and reset the check_pstn var so &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* every id in the chain is always tested vs every available id. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if findw(all_linked_ids, substr(linked_ids[check_pstn], 5, 3) and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn ^= link_being_checked and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; findw(all_linked_ids, substr(linked_ids[check_pstn], 1, 3)) &amp;lt; 1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; then do; all_linked_ids = catx('-', all_linked_ids, substr(linked_ids[check_pstn], 1, 3));&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn = 1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* when the end of the ids in the array is reached, output a record of this id chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* separate the chained ids out into individual ids */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; data work.links_1 (keep=id);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; set work.links;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; length id $3;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; do j = 1 to (length(compress(all_linked_ids))+1) / 4 until(p eq 0);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; call scan(all_linked_ids, j, p, l);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; id = substrn(all_linked_ids, p, l);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* create a list of the ids in this chain so it can be removed from the main list of ids */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc sql noprint; select "'"||id||"'" into :cid_list separated by ' ' from work.links_1; quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* delete the ids from the main list */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc sql noprint; delete from work.have where substr(ids, 1, 3) in (&amp;amp;cid_list.) or substr(ids, 5, 3) in (&amp;amp;cid_list.); quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* maintain a list of all the link chains created so far */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc append base=work.all_links&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; data=work.links;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* count the number of links yet to be tested to become part of a chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc sql noprint; select count(*) into :link_count from work.have; quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; %end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%mend chain;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%chain;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm working in SAS 9.4. Many thanks in advance for any advice or suggestions.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Cy&lt;/P&gt;</description>
    <pubDate>Sun, 18 Nov 2018 20:46:33 GMT</pubDate>
    <dc:creator>CyFerguson</dc:creator>
    <dc:date>2018-11-18T20:46:33Z</dc:date>
    <item>
      <title>Find all linked ids in a list of id pairs (with multiple links)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514291#M138677</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'd like to come up with a more efficient way of searching a fairly large list of id pairs (let's say 200k+ id pairs) and creating chains of ids from them.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Example (12 pairs of linked ids, 3 chains of ids in total):&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data have;&lt;/P&gt;&lt;P&gt;&amp;nbsp; length ids $7;&lt;/P&gt;&lt;P&gt;&amp;nbsp; input ids $;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;datalines;&lt;/P&gt;&lt;P&gt;123-456&lt;/P&gt;&lt;P&gt;111-222&lt;/P&gt;&lt;P&gt;111-333&lt;/P&gt;&lt;P&gt;222-333&lt;/P&gt;&lt;P&gt;333-444&lt;/P&gt;&lt;P&gt;333-555&lt;/P&gt;&lt;P&gt;666-777&lt;/P&gt;&lt;P&gt;444-888&lt;/P&gt;&lt;P&gt;444-555&lt;/P&gt;&lt;P&gt;888-777&lt;/P&gt;&lt;P&gt;123-987&lt;/P&gt;&lt;P&gt;654-321&lt;/P&gt;&lt;P&gt;;&lt;/P&gt;&lt;P&gt;run;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The result I want is:&lt;/P&gt;&lt;P&gt;123-456-987&lt;/P&gt;&lt;P&gt;111-222-333-444-555-888-777-666&lt;/P&gt;&lt;P&gt;654-321&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The output order is not important (e.g. 456-987-123 would still be fine in the first chain). In the real data each id is 14 characters long.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a macro which gives me this result (below) but to process 19k pairs of linked ids takes around 20 minutes which I'd really like to improve (ideally it would process 200k+ pairs in around 20 minutes).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Can anyone help me improve the efficiency of the logic I have, or, describe an alternative approach?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I found this link -&amp;nbsp;&lt;A href="http://support.sas.com/kb/26/160.html" target="_blank"&gt;http://support.sas.com/kb/26/160.html&lt;/A&gt; - which does something similar using hash tables (and runs in seconds with a test dataset of 100k+ id pairs) but if I break my 'have' dataset down into id and new_id and run it through that code it doesn't find all the links and produces duplicate chains.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This is my current code:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%macro chain();&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* count the number of links yet to become part of a chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; proc sql noprint; select count(*) into :link_count from work.have; quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; %do %until (%eval(&amp;amp;link_count = 0));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* create a one row table containing each id pair */&lt;/P&gt;&lt;P&gt;&amp;nbsp; proc transpose data=work.have out=work.have_t (drop=_name_) prefix = lp_;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; var ids;&lt;/P&gt;&lt;P&gt;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* do the linking */&lt;/P&gt;&lt;P&gt;&amp;nbsp; data work.links (keep=all_linked_ids);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; set work.have_t;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; attrib all_linked_ids = $50;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; array linked_ids[*] lp_: _temporary_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; do link_being_checked = 1 to 1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; all_linked_ids = linked_ids[link_being_checked];&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; do check_pstn = 1 to dim(linked_ids);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* if the id on the left of the dash matches an id already in the chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* and the id on the right of the dash hasn't already been added to &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* the chain, add it to the chain and reset the check_pstn var so &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* every id in the chain is always tested vs every available id. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if findw(all_linked_ids, substr(linked_ids[check_pstn], 1, 3) and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn ^= link_being_checked and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; findw(all_linked_ids, substr(linked_ids[check_pstn], 5, 3)) &amp;lt; 1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; then do; all_linked_ids = catx('-', all_linked_ids, substr(linked_ids[check_pstn], 5, 3));&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn = 1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; else&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* if the id on the right of the dash matches an id already in the chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* and the id on the left of the dash hasn't already been added to &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* the chain, add it to the chain and reset the check_pstn var so &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; /* every id in the chain is always tested vs every available id. &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; if findw(all_linked_ids, substr(linked_ids[check_pstn], 5, 3) and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn ^= link_being_checked and&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; findw(all_linked_ids, substr(linked_ids[check_pstn], 1, 3)) &amp;lt; 1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; then do; all_linked_ids = catx('-', all_linked_ids, substr(linked_ids[check_pstn], 1, 3));&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; check_pstn = 1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* when the end of the ids in the array is reached, output a record of this id chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* separate the chained ids out into individual ids */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; data work.links_1 (keep=id);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; set work.links;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; length id $3;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; do j = 1 to (length(compress(all_linked_ids))+1) / 4 until(p eq 0);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; call scan(all_linked_ids, j, p, l);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; id = substrn(all_linked_ids, p, l);&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* create a list of the ids in this chain so it can be removed from the main list of ids */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc sql noprint; select "'"||id||"'" into :cid_list separated by ' ' from work.links_1; quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* delete the ids from the main list */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc sql noprint; delete from work.have where substr(ids, 1, 3) in (&amp;amp;cid_list.) or substr(ids, 5, 3) in (&amp;amp;cid_list.); quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* maintain a list of all the link chains created so far */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc append base=work.all_links&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp; data=work.links;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; /* count the number of links yet to be tested to become part of a chain */&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; proc sql noprint; select count(*) into :link_count from work.have; quit;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; %end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%mend chain;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;%chain;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm working in SAS 9.4. Many thanks in advance for any advice or suggestions.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Cy&lt;/P&gt;</description>
      <pubDate>Sun, 18 Nov 2018 20:46:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514291#M138677</guid>
      <dc:creator>CyFerguson</dc:creator>
      <dc:date>2018-11-18T20:46:33Z</dc:date>
    </item>
    <item>
      <title>Re: Find all linked ids in a list of id pairs (with multiple links)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514327#M138688</link>
      <description>Try this: &lt;BR /&gt;&lt;A href="https://communities.sas.com/t5/SAS-Communities-Library/SAS-macro-for-finding-all-paths-in-a-directed-graph-network/ta-p/221568" target="_blank"&gt;https://communities.sas.com/t5/SAS-Communities-Library/SAS-macro-for-finding-all-paths-in-a-directed-graph-network/ta-p/221568&lt;/A&gt;</description>
      <pubDate>Mon, 19 Nov 2018 03:12:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514327#M138688</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-11-19T03:12:53Z</dc:date>
    </item>
    <item>
      <title>Re: Find all linked ids in a list of id pairs (with multiple links)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514336#M138693</link>
      <description>&lt;P&gt;What you describe amounts to finding the connected components in&amp;nbsp;an undirected graph. Proc optnet (part of SAS/OR) has a routine for that:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines dlm="-";
  length id1 id2 $7;
  input (id1 id2) (:$7.);
datalines;
123-456
111-222
111-333
222-333
333-444
333-555
666-777
444-888
444-555
888-777
123-987
654-321
;

proc optnet data_links=have graph_direction=undirected out_nodes=groups;
data_links_var from=id1 to=id2;
concomp;
run;

proc sort data=groups; by concomp node; run;

data chains;
length chain $100;
do until(last.concomp);
    set groups; by concomp;
    chain = catx("-", chain, node);
    end;
keep concomp chain;
rename concomp=chain_ID;
run;

proc print data=chains noobs; run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;PRE&gt;                    chain                              chain_ID

                    123-456-987                            1
                    111-222-333-444-555-666-777-888        2
                    321-654                                3
&lt;/PRE&gt;</description>
      <pubDate>Mon, 19 Nov 2018 04:32:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514336#M138693</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2018-11-19T04:32:55Z</dc:date>
    </item>
    <item>
      <title>Re: Find all linked ids in a list of id pairs (with multiple links)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514403#M138725</link>
      <description>&lt;P&gt;If you don't have SAS/OR .Try this one .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines dlm="-";
  length from to $7;
  input (from to) (:$7.);
datalines;
123-456
111-222
111-333
222-333
333-444
333-555
666-777
444-888
444-555
888-777
123-987
654-321
;



data full;
  set have end=last;
  if _n_ eq 1 then do;
   declare hash h();
    h.definekey('node');
     h.definedata('node');
     h.definedone();
  end;
  output;
  node=from; h.replace();
  from=to; to=node;
  output;
  node=from; h.replace();
  if last then h.output(dataset:'node');
  drop node;
run;


data want(keep=node household);
declare hash ha(ordered:'a');
declare hiter hi('ha');
ha.definekey('count');
ha.definedata('last');
ha.definedone();
declare hash _ha(hashexp: 20);
_ha.definekey('key');
_ha.definedone();

if 0 then set full;
declare hash from_to(dataset:'full(where=(from is not missing and to is not missing))',hashexp:20,multidata:'y');
 from_to.definekey('from');
 from_to.definedata('to');
 from_to.definedone();

if 0 then set node;
declare hash no(dataset:'node');
declare hiter hi_no('no');
 no.definekey('node');
 no.definedata('node');
 no.definedone();
 

do while(hi_no.next()=0);
 household+1; output;
 count=1;
 key=node;_ha.add();
 last=node;ha.add();
 rc=hi.first();
 do while(rc=0);
   from=last;rx=from_to.find();
   do while(rx=0);
     key=to;ry=_ha.check();
      if ry ne 0 then do;
       node=to;output;rr=no.remove(key:node);
       key=to;_ha.add();
       count+1;
       last=to;ha.add();
      end;
      rx=from_to.find_next();
   end;
   rc=hi.next();
end;
ha.clear();_ha.clear();
end;
stop;
run;



data final_want;
length chain $ 800;
do until(last.household);
    set want; 
    by household;
    chain = catx("-", chain, node);
    end;
keep household chain;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 19 Nov 2018 12:01:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514403#M138725</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2018-11-19T12:01:55Z</dc:date>
    </item>
    <item>
      <title>Re: Find all linked ids in a list of id pairs (with multiple links)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514611#M138767</link>
      <description>&lt;DIV&gt;PGStats - thanks, unfortunately we don't have the SAS/OR package at my site - pity as it looks like such a good solution!&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Reeza and Ksharp, thanks for your replies, they both look encouraging - I will test them on Thursday when I'm back on site and provide an update.&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Cy&lt;/DIV&gt;</description>
      <pubDate>Mon, 19 Nov 2018 22:25:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514611#M138767</guid>
      <dc:creator>CyFerguson</dc:creator>
      <dc:date>2018-11-19T22:25:13Z</dc:date>
    </item>
    <item>
      <title>Re: Find all linked ids in a list of id pairs (with multiple links)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514654#M138787</link>
      <description>&lt;P&gt;You can also make use of the subGraphs macro given &lt;A href="https://communities.sas.com/t5/SAS-Communities-Library/How-to-find-all-connected-components-in-a-graph/ta-p/231539" target="_self"&gt;here&lt;/A&gt;, this way&amp;nbsp;:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines dlm="-";
  length id1 id2 $7;
  input (id1 id2) (:$7.);
datalines;
123-456
111-222
111-333
222-333
333-444
333-555
666-777
444-888
444-555
888-777
123-987
654-321
;

%include "&amp;amp;sasforum\subGraphsMacro.sas";

%SubGraphs(have,from=id1,to=id2,out=groups);

proc sort data=groups; by clust node; run;

data chains;
length chain $100;
chain_ID + 1;
do until(last.clust);
    set groups; by clust;
    chain = catx("-", chain, node);
    end;
keep chain_ID chain;
run;

proc print data=chains noobs; run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;PRE&gt;                    chain                              chain_ID

                    123-456-987                            1
                    111-222-333-444-555-666-777-888        2
                    321-654                                3
&lt;/PRE&gt;</description>
      <pubDate>Tue, 20 Nov 2018 03:29:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Find-all-linked-ids-in-a-list-of-id-pairs-with-multiple-links/m-p/514654#M138787</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2018-11-20T03:29:29Z</dc:date>
    </item>
  </channel>
</rss>

