<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: non-trivial algorythm for char fields in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598461#M172584</link>
    <description>&lt;P&gt;Values in 'name' field are&amp;nbsp;looped chains.&lt;/P&gt;&lt;P&gt;If sequence of the elements is different, we have different&amp;nbsp;chains.&lt;/P&gt;&lt;P&gt;Under numbers 3, 4, 5 there is the same sequence of the elements with different starting point; the chain is the same - so we are having duplicates.&lt;/P&gt;</description>
    <pubDate>Tue, 22 Oct 2019 17:25:14 GMT</pubDate>
    <dc:creator>Ivan555</dc:creator>
    <dc:date>2019-10-22T17:25:14Z</dc:date>
    <item>
      <title>non-trivial algorithm for char fields (arrays, sorting, etc..)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598429#M172574</link>
      <description>&lt;P&gt;Hello!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Performing one of the tasks, I got stuck and can not find the best way to solve the problem.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a table.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table tmp (num int, name char(200));
insert into tmp
	values (1,'281.3891.3891.281')
	values (2,'3891.281.281.3891')
	values (3,'1162.5645.5645.500835.500835.1162')
	values (4,'5645.500835.500835.1162.1162.5645')
	values (5,'500835.1162.1162.5645.5645.500835')
	values (6,'1349.1162.1162.5645.5645.500835.500835.1349')
	values (7,'1162.5645.5645.500835.500835.1349.1349.1162')
	values (8,'5645.500835.500835.1349.1349.1162.1162.5645')
	values (9,'500835.1349.1349.1162.1162.5645.5645.500835');
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Each line is a chain, and taking this into account, it is clear that the lines {1;2}, {3;4;5}, {6,7,8,9} are duplicates.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;The question is, how would it be most correct to filter the rows so that only one row remains inside each of the duplicate groups?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;(for example the lowest value of the first number like rows 1, 3, 7)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I was thinking of creating an array for each row and sorting the values ​​by a common shift within each array. But I'm not sure if this is the smartest way ..&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;THX!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Oct 2019 09:49:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598429#M172574</guid>
      <dc:creator>Ivan555</dc:creator>
      <dc:date>2019-10-24T09:49:55Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598444#M172578</link>
      <description>&lt;P&gt;I am not exactly sure that I understand exactly what you want but see if this gets close.&lt;/P&gt;
&lt;PRE&gt;proc sql;
create table tmp (num int, name char(200));
insert into tmp
	values (1,'281.3891.3891.281')
	values (2,'3891.281.281.3891')
	values (3,'1162.5645.5645.500835.500835.1162')
	values (4,'5645.500835.500835.1162.1162.5645')
	values (5,'500835.1162.1162.5645.5645.500835')
	values (6,'1349.1162.1162.5645.5645.500835.500835.1349')
	values (7,'1162.5645.5645.500835.500835.1349.1349.1162')
	values (8,'5645.500835.500835.1349.1349.1162.1162.5645')
	values (9,'500835.1349.1349.1162.1162.5645.5645.500835');
quit;

data tmp2;
   set tmp;
   array v (50) $ 6;
   length newname $200;
   do i= 1 to countw(name);
      v[i]= scan(name,i,'.');
   end;
   call sortc(of v(*));
   newname=catx('.',of v(*));
   drop v: i;
run;

proc sort data=tmp2 out=want nodupkey;
  by newname;
run;
   &lt;/PRE&gt;</description>
      <pubDate>Tue, 22 Oct 2019 15:57:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598444#M172578</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2019-10-22T15:57:33Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598447#M172580</link>
      <description>Does it have to be an exact match?&lt;BR /&gt;What if the 2nd row was missing one code but since the codes are duplicated?&lt;BR /&gt;&lt;BR /&gt;ie &lt;BR /&gt;&lt;BR /&gt;if 2 was&lt;BR /&gt;3891.281.3891 would it still be a duplicate?</description>
      <pubDate>Tue, 22 Oct 2019 16:04:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598447#M172580</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2019-10-22T16:04:37Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598455#M172581</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you, I am checking&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;BLOCKQUOTE&gt;&lt;P&gt;&lt;SPAN&gt;Does it have to be an exact match?&lt;/SPAN&gt;&lt;/P&gt;&lt;/BLOCKQUOTE&gt;&lt;P&gt;Yes, exact match using parallel shift(can't catch how better to say).&lt;/P&gt;&lt;DIV class="lia-quilt-row lia-quilt-row-forum-message-main"&gt;&lt;DIV class="lia-quilt-column lia-quilt-column-20 lia-quilt-column-right lia-quilt-column-main-right"&gt;&lt;DIV class="lia-quilt-column-alley lia-quilt-column-alley-right"&gt;&lt;DIV class="lia-message-body lia-component-body"&gt;&lt;BLOCKQUOTE&gt;&lt;DIV class="lia-message-body-content"&gt;3891.281.3891 would it still be a duplicate?&lt;/DIV&gt;&lt;/BLOCKQUOTE&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class="lia-quilt-row lia-quilt-row-forum-message-footer"&gt;&lt;DIV class="lia-quilt-column lia-quilt-column-04 lia-quilt-column-left lia-quilt-column-footer-left"&gt;&lt;DIV class="lia-quilt-column-alley lia-quilt-column-alley-left"&gt;&lt;DIV class="lia-message-notify lia-component-report-abuse"&gt;Value '3891.281.281.3891' and value '3891.281.3891.282' will not be dupllicates.&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Perhars in start message there was placed not very good example.. groups {3;4;5} and {10;11;12} are different&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table tmp (num int, name char(200));
insert into tmp
	values (1,'281.3891.3891.281')
	values (2,'3891.281.281.3891')
	values (3,'1162.5645.5645.500835.500835.1162')
	values (4,'5645.500835.500835.1162.1162.5645')
	values (5,'500835.1162.1162.5645.5645.500835')
	values (6,'1349.1162.1162.5645.5645.500835.500835.1349')
	values (7,'1162.5645.5645.500835.500835.1349.1349.1162')
	values (8,'5645.500835.500835.1349.1349.1162.1162.5645')
	values (9,'500835.1349.1349.1162.1162.5645.5645.500835')
	values (10,'5645.1162.1162.500835.500835.5645')
	values (11,'1162.500835.500835.5645.5645.1162')
	values (12,'500835.5645.5645.1162.1162.500835');
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;p.s. While writing I am trying to do my best, but seems my English is rather unclear.. I am very Sorry for it..&lt;/P&gt;</description>
      <pubDate>Tue, 22 Oct 2019 16:42:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598455#M172581</guid>
      <dc:creator>Ivan555</dc:creator>
      <dc:date>2019-10-22T16:42:46Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598458#M172582</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;It works and it is close,&amp;nbsp;Thank you!&lt;/P&gt;&lt;P&gt;Could you say is it possible to improve your code according to my second example? - groups {3;4;5} and {10;11;12} should differ.&lt;/P&gt;</description>
      <pubDate>Tue, 22 Oct 2019 17:08:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598458#M172582</guid>
      <dc:creator>Ivan555</dc:creator>
      <dc:date>2019-10-22T17:08:03Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598459#M172583</link>
      <description>What makes them different?</description>
      <pubDate>Tue, 22 Oct 2019 17:10:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598459#M172583</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2019-10-22T17:10:53Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598461#M172584</link>
      <description>&lt;P&gt;Values in 'name' field are&amp;nbsp;looped chains.&lt;/P&gt;&lt;P&gt;If sequence of the elements is different, we have different&amp;nbsp;chains.&lt;/P&gt;&lt;P&gt;Under numbers 3, 4, 5 there is the same sequence of the elements with different starting point; the chain is the same - so we are having duplicates.&lt;/P&gt;</description>
      <pubDate>Tue, 22 Oct 2019 17:25:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598461#M172584</guid>
      <dc:creator>Ivan555</dc:creator>
      <dc:date>2019-10-22T17:25:14Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598464#M172585</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13879"&gt;@Reeza&lt;/a&gt;: I take it that &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/272354"&gt;@Ivan555&lt;/a&gt;&amp;nbsp;regards two strings (in variable NAME) as "duplicates" if they are &lt;EM&gt;cyclic permutations&lt;/EM&gt; of each other, where the items to be permuted are the "words" (numbers) separated by periods. That is, the set of potential duplicates of &lt;FONT face="courier new,courier"&gt;'B.A.C.D'&lt;/FONT&gt; would be&lt;/P&gt;
&lt;PRE&gt;{'A.C.D.B', 'B.A.C.D', 'C.D.B.A', 'D.B.A.C'}&lt;/PRE&gt;</description>
      <pubDate>Tue, 22 Oct 2019 17:36:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598464#M172585</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2019-10-22T17:36:59Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598472#M172590</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/32733"&gt;@FreelanceReinh&lt;/a&gt;&amp;nbsp;you took it correct, thank you.&lt;/P&gt;</description>
      <pubDate>Tue, 22 Oct 2019 18:02:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598472#M172590</guid>
      <dc:creator>Ivan555</dc:creator>
      <dc:date>2019-10-22T18:02:33Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598503#M172597</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/272354"&gt;@Ivan555&lt;/a&gt;:&lt;/P&gt;
&lt;P&gt;As I understand, your definition of duplicates is the names whose dot-delimited parts form the same circular queue. If so, we need to:&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;find the largest (or smallest) part&lt;/LI&gt;
&lt;LI&gt;rotate the queue until this part ends up at the end (or beginning) of the name&lt;/LI&gt;
&lt;LI&gt;if a subsequent name formed in such a manner is the same as previous, ignore the record&lt;/LI&gt;
&lt;/OL&gt;
&lt;P&gt;(EDIT:&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp;has pointed out - correctly - that there's a hole in the original queue rotation logic in case the largest chunks ends up both at the top and the bottom. The queue should be rotated further and stop when the largest chunk is on the top but not on the bottom. Below, a provision is also made to stop the loop from rotating infinitely if all the chunks in the string are the same. The test string &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp;used to show the flaw is included in the sample data set as the 0 record.)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;In other (SAS) words:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have ;                                                                                                                             
  input NUM NAME :$200. ;                                                                                                               
  cards ;                                                                                                                               
 0  281.281.3891.3891                                                                                                                   
 1  281.3891.3891.281                                                                                                                   
 2  3891.281.281.3891                                                                                                                   
 3  1162.5645.5645.500835.500835.1162                                                                                                   
 4  5645.500835.500835.1162.1162.5645                                                                                                   
 5  500835.1162.1162.5645.5645.500835                                                                                                   
 6  1349.1162.1162.5645.5645.500835.500835.1349                                                                                         
 7  1162.5645.5645.500835.500835.1349.1349.1162                                                                                         
 8  5645.500835.500835.1349.1349.1162.1162.5645                                                                                         
 9  500835.1349.1349.1162.1162.5645.5645.500835                                                                                         
10  5645.1162.1162.500835.500835.5645                                                                                                   
11  1162.500835.500835.5645.5645.1162                                                                                                   
12  500835.5645.5645.1162.1162.500835                                                                                                   
;                                                                                                                                       
run ;                                                                                                                                   
                                                                                                                                        
data want (drop = _:) ;                                                                                                                 
  if _n_ = 1 then do ;                                                                                                                  
    dcl hash h () ;                                                                                                                     
    h.definekey ("_nm") ;                                                                                                               
    h.definedone () ;                                                                                                                   
  end ;                                                                                                                                 
  set have ;                                                                                                                            
  length _tm _t $ 16 ;                                                                                                                  
  _nm = name ;                                                                                                                          
  do _n_ = 1 to countw (_nm) ;                                                                                                          
    _t = scan (_nm, _n_) ;                                                                                                              
    if _t &amp;gt; _tm then _tm = _t ;                                                                                                         
  end ;                                                                                                                                 
  do _n_ = 1 to countw (_nm) - 1 while (not (scan (_nm, 1) &amp;lt; _t = _tm)) ;                                                               
    _nm = catx (".", substr (_nm, findc (_nm, ".") + 1), scan (_nm, 1)) ;                                                               
    _t = scan (_nm, -1) ;                                                                                                               
  end ;                                                                                                                                 
  if h.check() ne 0 ;                                                                                                                   
  h.add() ;                                                                                                                             
run ;                                              
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;As&amp;nbsp;a result, you get the records 0, 3, 6, 10.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind regards&lt;/P&gt;
&lt;P&gt;Paul D.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 00:17:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598503#M172597</guid>
      <dc:creator>hashman</dc:creator>
      <dc:date>2019-10-23T00:17:19Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598536#M172612</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/21262"&gt;@hashman&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You logic fails for this record #2.&lt;/P&gt;
&lt;P&gt;I amended it slightly to generate a better sorted comparison string.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE ;                                                                                                                             
  input NUM NAME :$200. ; 
  cards ;                                                                                                                               
 1 281.3891.3891.281                                                                                                                   
 2 281.281.3891.3891                                                                                                                   
 3 1162.5645.5645.500835.500835.1162                                                                                                   
 4 5645.500835.500835.1162.1162.5645                                                                                                   
 5 500835.1162.1162.5645.5645.500835                                                                                                   
 6 1349.1162.1162.5645.5645.500835.500835.1349                                                                                         
 7 1162.5645.5645.500835.500835.1349.1349.1162                                                                                         
 8 5645.500835.500835.1349.1349.1162.1162.5645                                                                                         
 9 500835.1349.1349.1162.1162.5645.5645.500835                                                                                         
10 5645.1162.1162.500835.500835.5645                                                                                                   
11 1162.500835.500835.5645.5645.1162                                                                                                   
12 500835.5645.5645.1162.1162.500835                                                                                                   
run;  

data WANT (drop = _:) ;                                                                                                                 
  if _N_ = 1 then do ;                                                                                                                  
    dcl hash H () ;                                                                                                                     
    H.definekey ("_NM") ;                                                                                                               
    H.definedone () ;                                                                                                                   
  end ;                                                                                                                                 
  set HAVE ;                                                                                                                            
  length _TM _T $16 ;                                                                                                                  
  _NM = NAME ;                                                                                                                          
  do _N_ = 1 to countw (_NM) ;                                                                                                          
    _T = scan (_NM, _N_) ;                                                                                                              
    if _T &amp;gt; _TM then _TM = _T ;                                                                                                         
  end ;                                                                                                                           
  do until (scan (_NM, 1) ^= _TM &amp;amp; scan (_NM, -1) = _TM ) ;                                                                                                                
    _T = scan (_NM, 1) ;                                                                                                                
    _NM = catx ('.', substr (_NM, findc (_NM, ".") + 1), _T) ;                                                                       
  end ;                                                                                                                                 
  if H.check() ne 0 ;                                                                                                                   
  H.add() ;                                                                                                                             
run ;          
 &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 22 Oct 2019 21:34:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598536#M172612</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2019-10-22T21:34:02Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598565#M172629</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;:&lt;/P&gt;
&lt;P&gt;Thanks for pointing it out! and amending the code.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The logic is fundamentally correct but I've &lt;EM&gt;underrotated&lt;/EM&gt; the queue with respect to possible dupes in the string itself. The rotation should stop when the largest chunk is atop the queue and not at the bottom at the same time.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note that we had better make a provision to avoid the infinite loop in case all chunks are the same. I'll include it in the amendment to my original reply.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind regards&lt;/P&gt;
&lt;P&gt;Paul D.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 00:06:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598565#M172629</guid>
      <dc:creator>hashman</dc:creator>
      <dc:date>2019-10-23T00:06:58Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598588#M172635</link>
      <description>&lt;P&gt;The following added later after reading&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/21262"&gt;@hashman&lt;/a&gt;&amp;nbsp;'s comment: Below code will group all permutations of identical elements together. It will not distinct between different chains with identical elements.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Below approach reads the source string into an array, sorts the array and then creates a md5 digest value as group key.&lt;/P&gt;
&lt;P&gt;The 2nd part of the code then uses a hash table to only keep the first instance per group key. You could also skip this step and replace it with a&amp;nbsp;&lt;EM&gt;proc sort nodupkey&lt;/EM&gt;.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want /*(drop=_:)*/;
  set have;
  length _GroupKey $32;
  array _words {20} $10. _temporary_;
  call missing(of _words[*]);

  /* create variable _GroupKey */
  do _i=1 to dim(_words);
    _words[_i]=scan(name,_i,'.');
    if missing(_words[_i]) then leave;
  end;
  call sortc(of _words[*]);
  _GroupKey=put(md5(catx('|',of _words[*])),hex32.);

  /* keep only first instance per _GroupKey */
  if _n_=1 then
    do;
      dcl hash h1();
      h1.defineKey('_GroupKey');
      h1.defineDone();
    end;
  if h1.check() ne 0 then
    do;
      output;
      h1.add();
    end;
run ;

proc print;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Or here another option which will work for any number of words in the source string.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
data want /*(drop=_:)*/;

  set have;
  length _NameSorted $32 _word $20;

  /* create and populate variable _NameSorted */
  if _n_=1 then
    do;
      dcl hash h1(ordered:'y', multidata:'y', hashexp:4);
      dcl hiter hh1('h1');
      h1.defineKey('_word');
      h1.defineData('_word');
      h1.defineDone();
    end;
  h1.clear();

  do _i=1 by 1 until(missing(_word));
    _word=scan(name,_i,'.');
    h1.add();
  end;

  _rc = hh1.first();
  do while(_rc = 0);
    _NameSorted=catx('.',_NameSorted,_word);
    _rc = hh1.next();
  end;

  /* keep only first instance per _NameSorted */
  if _n_=1 then
    do;
      dcl hash h2();
      h2.defineKey('_NameSorted');
      h2.defineDone();
    end;
  if h2.check() ne 0 then
    do;
      output;
      h2.add();
    end;
run ;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 25 Oct 2019 00:05:56 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598588#M172635</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2019-10-25T00:05:56Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598602#M172640</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12447"&gt;@Patrick&lt;/a&gt;:&lt;BR /&gt;1. The problem with the sorting approach (regardless of how the sorting is done) is that it can map two strings which aren't duplicates per OP's definition the same way and, therefore, kill more records than needed. For example, the strings (case #1):&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;[1. 2. 3] &amp;nbsp;[3.1.2] &amp;nbsp;[2.3.1]&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;are duplicates of each other since they can be transformed into one another by moving the front item to the back a number of times. In other words, each string above represents the same circular queue. By contrast, the string (case #2):&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;[2.1.3]&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;isn't a duplicate of any of the case #1 strings because the items 1 and 2 are transposed, and so no circular rotation of its items can transform it into any of the case #1 strings.. For [2.1.3], the duplicates are [3.2.1] and [1.3.2] &amp;nbsp;- again, all three belong to the same circular queue; but it's different from the queue in case #1.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Since sorting involves transpositions, it would map all of the above strings to [1.2.3] and thus render them all duplicate, scrambling the distinction between the cases #1 and #2. This is why the approach I've offered is to rotate the queue until the largest item becomes rightmost. After this is done, any string for case #1 becomes [1.2.3], while the strings for case #2 map to [2.1.3]. Hence, the hash table considers all the strings from case #1 duplicate, and so it does for all the strings in case #2; but it sees no duplicates between the cases. &amp;nbsp; &amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If strings should contain duplicate items, the rotating algorithm above may run into a snag pointed out by &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;. For example, if we have [1.2.3.3] and [3.3.1.2], the first rotation of the latter results in [3.1.2.3]. If, per the algorithm above, we stop here, the hash table will consider [1.2.3.3] and [3.1.2.3] different. To circumvent this, we need to rotate until he largest item is on the right, and yet it's not the same as the item on the left. Rotating [3.1.2.3] one more time achieves that by moving 3 from the left to the right resulting in [1.2.3.3]. Thus, by stipulating the criterion that we rotate until he largest item is on the right, and yet it's not the same as the item on the left, we always map &lt;EM&gt;the same circular queue&lt;/EM&gt; regardless of the arrangement of its items to the same surrogate key. &amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;There remains a case where rotating a queue in this manner results in an infinite loop - namely, when all the items are the same, and so UNTIL or WHILE intended to stop the loop when the leftmost and rightmost items become different can never evaluate as true. This can be worked around by either (a) not looping at all if all the items are the same (there're plenty of ways to ascertain that beforehand) or (b) always stopping the loop after it has iterated N(number of items)-1 times (as iterating one more time results into the string before the rotation).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;2. I'm all for using an MD5 digest to reduce the hash entry length. But why make it $32 instead of $16 (which is what it is)? If this is just for the convenience of seeing only the hex characters rather than "garbage" and unprintable characters, we can simply attach the $hex32. format to the variable in question. &amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind regards&lt;/P&gt;
&lt;P&gt;Paul D.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 04:19:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598602#M172640</guid>
      <dc:creator>hashman</dc:creator>
      <dc:date>2019-10-23T04:19:18Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598604#M172641</link>
      <description>&lt;P&gt;A graph is defined both by nodes and links. For two graphs to be equal, they must have the same nodes and links. Here is a simple way to ensure that:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE ;                                                                                                                             
  input NUM NAME :$200. ; 
  cards ;                                                                                                                               
 1 281.3891.3891.281                                                                                                                   
 2 281.281.3891.3891                                                                                                                   
 3 1162.5645.5645.500835.500835.1162                                                                                                   
 4 5645.500835.500835.1162.1162.5645                                                                                                   
 5 500835.1162.1162.5645.5645.500835                                                                                                   
 6 1349.1162.1162.5645.5645.500835.500835.1349                                                                                         
 7 1162.5645.5645.500835.500835.1349.1349.1162                                                                                         
 8 5645.500835.500835.1349.1349.1162.1162.5645                                                                                         
 9 500835.1349.1349.1162.1162.5645.5645.500835                                                                                         
10 5645.1162.1162.500835.500835.5645                                                                                                   
11 1162.500835.500835.5645.5645.1162                                                                                                   
12 500835.5645.5645.1162.1162.500835
13 1234                                                                                             
;

data temp;
array c_{99} $16;
set have;
length key $400;
n = countw(name,".");
do i = 0 to n-1;
	c_{i+1} = catx("-", scan(name,i+1,"."), scan(name,mod(i+1,n)+1,"."));
	end;
call sortc(of c_{*});
key = catx(".", of c_{*});
drop n i c_:;
run;

proc sort data=temp out=want(drop=key) nodupkey; by key; run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Note, I added a trivial case (13), just for safety.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 04:42:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598604#M172641</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2019-10-23T04:42:32Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598672#M172666</link>
      <description>&lt;P&gt;I'd like to suggest a variant of&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/21262"&gt;@hashman&lt;/a&gt;'s hash approach, adding all different cyclic permutations of the names from HAVE to the hash object (see, however, less memory consuming variant under "Edit 3"):&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want(drop=_:);
if _n_=1 then do;
  dcl hash h();
  h.definekey('_nm');
  h.definedone();
end;
set have;
_nm=name;
if h.check();
h.add();
do _i=1 to countw(_nm,'.')-1;
  _nm=catx('.',substr(_nm,findc(_nm,'.')+1),scan(_nm,1));
  h.ref();
end;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I'm afraid to say that this variant, the original (i.e. the edited version after ChrisNZ's comment)&lt;SPAN class="DateTime lia-message-edited-on lia-component-common-widget-date"&gt;&lt;SPAN class="local-time"&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;and&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/462"&gt;@PGStats&lt;/a&gt;'s code yield &lt;EM&gt;three different results&lt;/EM&gt; for the below HAVE dataset; 629, 759 and 599 observations, respectively.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have(drop=i:);
length name $9;
do i1=1 to 5;
  do i2=1 to 5;
    do i3=1 to 5;
      do i4=1 to 5;
        do i5=1 to 5;
          name=catx('.',of i:);
          num+1;
          output;
        end;
      end;
    end;
  end;
end;
run; /* 5**5=3125 obs. */&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Edit: Comparing the three results, it appears that among the 759 obs. there are both '1.1.2.1.2' and '1.2.1.1.2' in spite of their equivalence and in the 599 obs. the equivalence class of '1.1.3.1.2' is not represented.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Edit 2: Evidence that a correct solution would have 629 observations can be found here: &lt;A href="http://oeis.org/A056665" target="_blank" rel="noopener"&gt;http://oeis.org/A056665&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Edit 3: Here's another variant, which saves only one representative per equivalence class in the hash object.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want(drop=_:);
if _n_=1 then do;
  dcl hash h();
  h.definekey('_nm');
  h.definedone();
end;
set have;
_nm=name;
_rc=h.check();
do _i=1 to countw(_nm)-1 while(_rc);
  _nm=catx('.',substr(_nm,findc(_nm,'.')+1),scan(_nm,1));
  _rc=h.check();
end;
if _rc;
h.add();
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Oct 2019 17:08:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598672#M172666</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2019-10-23T17:08:16Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598774#M172712</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/462"&gt;@PGStats&lt;/a&gt;:&lt;/P&gt;
&lt;P&gt;An interesting angle to look at the problem - and the corresponding code.&lt;/P&gt;
&lt;P&gt;As far as I'm concerned, the more angles, the merrier.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind regards&lt;/P&gt;
&lt;P&gt;Paul D.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 16:55:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598774#M172712</guid>
      <dc:creator>hashman</dc:creator>
      <dc:date>2019-10-23T16:55:12Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598788#M172717</link>
      <description>&lt;P&gt;After sleeping on it and reading &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/32733"&gt;@FreelanceReinh&lt;/a&gt;&amp;nbsp; investigation, I realize that directed graphs can be traversed with many non identical paths when they include sub-cycles, such as:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE ;                                                                                                                             
  input NUM NAME :$200. ; 
  cards ;                                                                                                                               
 1 1.2.3.1.4.5.1.6.7.1                                                                                                                   
 2 1.2.3.1.6.7.1.4.5.1                                                                                                                 
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;My code identifies both paths as identical, when they are not. &lt;img id="smileysad" class="emoticon emoticon-smileysad" src="https://communities.sas.com/i/smilies/16x16_smiley-sad.png" alt="Smiley Sad" title="Smiley Sad" /&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 17:44:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598788#M172717</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2019-10-23T17:44:24Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598792#M172720</link>
      <description>&lt;P&gt;Another approach using SQL:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as
select *
from have as a 
where not exists (
	select * from have as b
	where a.num &amp;gt; b.num and length(a.name) = length(b.name) and
		index(cats(".", b.name, ".", b.name,"."), cats(".", a.name, ".")) &amp;gt; 0);
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 23 Oct 2019 18:14:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598792#M172720</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2019-10-23T18:14:48Z</dc:date>
    </item>
    <item>
      <title>Re: non-trivial algorythm for char fields</title>
      <link>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598796#M172722</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/462"&gt;@PGStats&lt;/a&gt;:&lt;/P&gt;
&lt;P&gt;Wow ... finally a way of doing this purely in terms of sets without procedural code. A clever way to identify the cycles, too!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Kind regards&lt;/P&gt;
&lt;P&gt;Paul D.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 23 Oct 2019 18:48:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/non-trivial-algorithm-for-char-fields-arrays-sorting-etc/m-p/598796#M172722</guid>
      <dc:creator>hashman</dc:creator>
      <dc:date>2019-10-23T18:48:36Z</dc:date>
    </item>
  </channel>
</rss>

