<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Remove similar names in a column in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821770#M324449</link>
    <description>&lt;P&gt;What are your criteria for "similar names"?&lt;/P&gt;</description>
    <pubDate>Wed, 06 Jul 2022 03:39:22 GMT</pubDate>
    <dc:creator>mkeintz</dc:creator>
    <dc:date>2022-07-06T03:39:22Z</dc:date>
    <item>
      <title>Remove similar names in a column</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821729#M324420</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a SAS table column which contains some SAS files names extracted from a directory (see below some examples). I want to keep only one row for similar names, I am not sure how to do that, for example the first three rows must be replaced by only one row which can be the first row 0-2004-editcacacon.sas.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;0-2004-editcacacon.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;0-2005-editcacacon.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;0-2112-editcacacon.sas&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;TABLE border="0" cellspacing="0" cellpadding="0"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;1.1 sald-etudettsald_2018.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.1 sald-etudettsald_2020.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.1 sald-etudettsald_2020-base 2018.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.1 vie-etudettvie_2018.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.1 vie-etudettvie_2020.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.1 vie-etudettvie_2020-qx_anc.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.2 sald-kkldmajtab_2018.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.2 sald-kkldmajtab_2020.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.2 vie- kkviemajtab_2018.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.2 vie-kkviemajtab_2020.sas&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.2 vie-kkviemajtab_2020-qx_anc.sas&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;</description>
      <pubDate>Tue, 05 Jul 2022 21:45:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821729#M324420</guid>
      <dc:creator>afsand</dc:creator>
      <dc:date>2022-07-05T21:45:24Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similar names in a column</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821770#M324449</link>
      <description>&lt;P&gt;What are your criteria for "similar names"?&lt;/P&gt;</description>
      <pubDate>Wed, 06 Jul 2022 03:39:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821770#M324449</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2022-07-06T03:39:22Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similar names in a column</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821780#M324455</link>
      <description>&lt;P&gt;Start by writing down all rules that identify similar values.&lt;/P&gt;</description>
      <pubDate>Wed, 06 Jul 2022 06:39:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821780#M324455</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2022-07-06T06:39:11Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similar names in a column</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821942#M324519</link>
      <description>&lt;P&gt;A simple way would be to extract a long string of letters (at least 8 long for example) from the file names and only keep unique ones. Using a regular expression, for example :&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
length txt $60;
input txt &amp;amp;;
datalines;
0-2004-editcacacon.sas
0-2005-editcacacon.sas
0-2112-editcacacon.sas
1.1 sald-etudettsald_2018.sas
1.1 sald-etudettsald_2020.sas
1.1 sald-etudettsald_2020-base 2018.sas
1.1 vie-etudettvie_2018.sas
1.1 vie-etudettvie_2020.sas
1.1 vie-etudettvie_2020-qx_anc.sas
1.2 sald-kkldmajtab_2018.sas
1.2 sald-kkldmajtab_2020.sas
1.2 vie- kkviemajtab_2018.sas
1.2 vie-kkviemajtab_2020.sas
1.2 vie-kkviemajtab_2020-qx_anc.sas
9.9 2002 pouet-pouet.sas
;

data temp;
if _n_=1 then id + prxparse("/[a-z]{8,}/"); /* minimum word length = 8 */
set test;
call prxsubstr(id, txt, pos, len);
length w $60;
if pos then w = substr(txt, pos, len);
else w = catx(" ", "No key", _n_);
drop id len pos;
run;

proc sort data=temp out=want(drop=w) nodupkey; by w; run;

proc print data=want noobs; run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="PGStats_0-1657142680673.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/73011i270CD0CE10DC5AD7/image-size/medium?v=v2&amp;amp;px=400" role="button" title="PGStats_0-1657142680673.png" alt="PGStats_0-1657142680673.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 06 Jul 2022 21:28:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/821942#M324519</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2022-07-06T21:28:22Z</dc:date>
    </item>
    <item>
      <title>Re: Remove similar names in a column</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/822043#M324577</link>
      <description>&lt;P&gt;It is really not easy. Here is the code you could start with .&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
length txt $60;
input txt &amp;amp;;
datalines;
0-2004-editcacacon.sas
0-2005-editcacacon.sas
0-2112-editcacacon.sas
1.1 sald-etudettsald_2018.sas
1.1 sald-etudettsald_2020.sas
1.1 sald-etudettsald_2020-base 2018.sas
1.1 vie-etudettvie_2018.sas
1.1 vie-etudettvie_2020.sas
1.1 vie-etudettvie_2020-qx_anc.sas
1.2 sald-kkldmajtab_2018.sas
1.2 sald-kkldmajtab_2020.sas
1.2 vie- kkviemajtab_2018.sas
1.2 vie-kkviemajtab_2020.sas
1.2 vie-kkviemajtab_2020-qx_anc.sas
9.9 2002 pouet-pouet.sas
;

proc sql;
create table have as
select a.txt as a_txt,b.txt as b_txt ,spedis(compress(a.txt,,'pds'),compress(b.txt,,'pds')) as distance
 from test as a,test as b
  where calculated distance&amp;lt;50
  order by a.txt,distance ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 07 Jul 2022 11:57:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Remove-similar-names-in-a-column/m-p/822043#M324577</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2022-07-07T11:57:03Z</dc:date>
    </item>
  </channel>
</rss>

