<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Sub-String Theory: Reducing duplicate substrings to single substrings in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981355#M379088</link>
    <description>&lt;P&gt;Below a possible approach.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines truncover;
  input have_string $200.;
  datalines;
Los Lunas/Corona/Corona/White Mountain/White Mountain
;

data want(drop=_:);
  if _n_=1 then
    do;
      length _term $50;
      dcl hash h1();
      h1.defineKey('_term');
      h1.defineDone();
      
    end;
    
  set have;
  
  if 0 then want_string=have_string;
    
  do _i=1 to countc(have_string,'/')+1;
    _term=scan(have_string,_i,'/');
    if h1.check() ne 0 then
      do;
        want_string=catx('/',want_string,_term);
        _rc=h1.add();
      end;
  end;
  
  _rc=h1.clear();
 run;
 
 proc print data=want;
 run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1767052952578.png" style="width: 671px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/112451i14281179FDA8C661/image-dimensions/671x57?v=v2" width="671" height="57" role="button" title="Patrick_0-1767052952578.png" alt="Patrick_0-1767052952578.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 30 Dec 2025 00:02:56 GMT</pubDate>
    <dc:creator>Patrick</dc:creator>
    <dc:date>2025-12-30T00:02:56Z</dc:date>
    <item>
      <title>Sub-String Theory: Reducing duplicate substrings to single substrings</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981352#M379086</link>
      <description>&lt;P&gt;I’m faced with the challenge to modify over 100K concatenated strings of the type:&lt;/P&gt;
&lt;P&gt;“Los Lunas/Corona/Corona/White Mountain/White Mountain"&lt;/P&gt;
&lt;P&gt;and it should be noted that some source strings may contain 2,3,or 4 each "Corona" or "White Mountain" sub-strings.&lt;/P&gt;
&lt;P&gt;The desired string would reduce the number of instances of&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;“White Mountain” and “Corona” to one each, and should look like this:&lt;/P&gt;
&lt;P&gt;“Los Lunas/Corona/White Mountain”&lt;/P&gt;
&lt;P&gt;I’ve attached the code I used to accomplish this (with a hat tip to blogs by Leonid Batkhan) that shows all the steps. &amp;nbsp;But surely there must be a simpler, more efficient, more elegant way to achieve the desired result.&amp;nbsp;&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN class="Apple-converted-space"&gt;Thanks in advance for any helpful suggestions or insights.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN class="Apple-converted-space"&gt;Happy Holidays!&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;Gene&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;

concat_location="Los Lunas/Corona/Corona/White Mountain/White Mountain";

Ccount=count(concat_location,"Corona");
if Ccount&amp;gt;1 then do
/*remove extra "Corona"s  */
mod1= prxchange('s/Corona//',Ccount-1,concat_location); 
/* translate // to blanks */
mod2=translate(mod1,' ','/');
/* compress blanks */
mod3=compbl(mod2);
/* translate blanks to '/' */
mod4=translate(mod3,'/',' ');
/* Trim leading '/' */
P1=findc(mod4,'/','K');
mod5=substr(mod4,P1);
/* Trim trailing '/' */
P2=findc(mod5,'/','K',-length(mod5));
mod6=substr(mod5,1,P2);
end;

if find(mod6,'White/Mountain')then
mod6=tranwrd(mod6,"White/Mountain","White Mountain");

Wcount=count(mod6,"White Mountain");
if Wcount&amp;gt;1 then do
/* remove extra "White Mountain"s */
mod7= prxchange('s/White Mountain//',Wcount-1,mod6); 
/* translate '//'' to blanks */
mod8=translate(mod7,' ','/');
/* compress blanks */
mod9=compbl(mod8);
/* translate blanks to '/' */
mod10=translate(mod9,'/',' ');
/* Trim leading '/' */
P1=findc(mod10,'/','K');
mod11=substr(mod10,P1);
/* Trim trailing '/' */
P2=findc(mod11,'/','K',-length(mod5));
mod12=substr(mod11,1,P2);
end;

if find(mod12,"White/Mountain") then
mod12=tranwrd(mod12,"White/Mountain","White Mountain");
if find(mod12,"Los/Lunas") then
want=tranwrd(mod12,"Los/Lunas","Los Lunas");
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 29 Dec 2025 22:08:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981352#M379086</guid>
      <dc:creator>genemroz</dc:creator>
      <dc:date>2025-12-29T22:08:30Z</dc:date>
    </item>
    <item>
      <title>Re: Sub-String Theory: Reducing duplicate substrings to single substrings</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981353#M379087</link>
      <description>&lt;P&gt;How about this?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  keep word;
  concat_location = "Los Lunas/Corona/Corona/White Mountain/White Mountain";
  do i = 1 to countw(concat_location,"/");
    word = scan(concat_location, i, "/");
    output;
  end;
run;

proc sort data = want nodupkey;
  by word;
run;

data want;
  keep concat_location_dedup;
  set want end = eof;
  retain concat_location_dedup;
  length concat_location_dedup $ 1024;
  concat_location_dedup = catx("/", concat_location_dedup, word);
  if eof;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 29 Dec 2025 23:57:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981353#M379087</guid>
      <dc:creator>SASKiwi</dc:creator>
      <dc:date>2025-12-29T23:57:27Z</dc:date>
    </item>
    <item>
      <title>Re: Sub-String Theory: Reducing duplicate substrings to single substrings</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981355#M379088</link>
      <description>&lt;P&gt;Below a possible approach.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines truncover;
  input have_string $200.;
  datalines;
Los Lunas/Corona/Corona/White Mountain/White Mountain
;

data want(drop=_:);
  if _n_=1 then
    do;
      length _term $50;
      dcl hash h1();
      h1.defineKey('_term');
      h1.defineDone();
      
    end;
    
  set have;
  
  if 0 then want_string=have_string;
    
  do _i=1 to countc(have_string,'/')+1;
    _term=scan(have_string,_i,'/');
    if h1.check() ne 0 then
      do;
        want_string=catx('/',want_string,_term);
        _rc=h1.add();
      end;
  end;
  
  _rc=h1.clear();
 run;
 
 proc print data=want;
 run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1767052952578.png" style="width: 671px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/112451i14281179FDA8C661/image-dimensions/671x57?v=v2" width="671" height="57" role="button" title="Patrick_0-1767052952578.png" alt="Patrick_0-1767052952578.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 30 Dec 2025 00:02:56 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981355#M379088</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2025-12-30T00:02:56Z</dc:date>
    </item>
    <item>
      <title>Re: Sub-String Theory: Reducing duplicate substrings to single substrings</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981356#M379089</link>
      <description>&lt;P&gt;Just make a NEW list.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
 old="Los Lunas/Corona/Corona/White Mountain/White Mountain";
run;

data want ;
  set have;
* trick to force data step to define NEW as same length as OLD ;  
  new=old;
* Prime the pump with the first word ;
  new=scan(old,1,'/');
* when next word is not in NEW then append it ;
  do i=2 to countw(old);
    if not findw(new,scan(old,i,'/'),'/','it') then 
       new=catx('/',new,scan(old,i,'/'))
    ;
  end;
  drop i;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Results&lt;/P&gt;
&lt;PRE&gt; 90         data _null_;
 91           set want;
 92           put (_all_) (=/);
 93         run;
 
 old=Los Lunas/Corona/Corona/White Mountain/White Mountain
 new=Los Lunas/Corona/White Mountain
 NOTE: There were 1 observations read from the data set WORK.WANT.&lt;/PRE&gt;</description>
      <pubDate>Tue, 30 Dec 2025 00:31:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981356#M379089</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2025-12-30T00:31:17Z</dc:date>
    </item>
    <item>
      <title>Re: Sub-String Theory: Reducing duplicate substrings to single substrings</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981374#M379096</link>
      <description>&lt;P&gt;Once again, after struggling for hours over what seemed an intractable puzzle, all I have to do is post a question here, and three better solutions appear in short order. &amp;nbsp;I'll mark Tom's as the Accepted because it seems to be the cleverest and also because the ony thing I know about hash is off-topic. &amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks and Happy Holidays to all!&lt;/P&gt;
&lt;P&gt;Gene&lt;/P&gt;</description>
      <pubDate>Tue, 30 Dec 2025 16:02:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Sub-String-Theory-Reducing-duplicate-substrings-to-single/m-p/981374#M379096</guid>
      <dc:creator>genemroz</dc:creator>
      <dc:date>2025-12-30T16:02:26Z</dc:date>
    </item>
  </channel>
</rss>

