<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Compare two variables and find any partial match in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879211#M82760</link>
    <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/443370"&gt;@chelm24&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;, I need to determine the words that match between 2 variables and not by score. Partial match at least &amp;gt;= 4 letters&amp;nbsp;&lt;SPAN&gt;the same in sequence.&lt;/SPAN&gt;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Exact match of WORD OR exact match of any string of 4 characters within the same WORD.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Above two options should be doable BUT if you go for the option with 4 characters it could then be any two words as long as they share a sequence of 4 identical characters.&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 05 Jun 2023 07:45:34 GMT</pubDate>
    <dc:creator>Patrick</dc:creator>
    <dc:date>2023-06-05T07:45:34Z</dc:date>
    <item>
      <title>Compare two variables and find any partial match</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879197#M82757</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I require assistance with comparing two variables in SAS and determining if there are any partial record matches.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data have;
length VAR1 $100 VAR2 $100;
input VAR1 $ VAR2 $;
infile datalines dlm='|';
datalines;
1.DR. MORRISON|
1.MORRISON| ABCFG MORRISON
1.DR. MORRISON| MORRISON
1.DR. MORRISON| DR. WRIGHT
1. LA HOSPITAL| SAN DIEGO
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;data want / Expected Result&lt;/STRONG&gt;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;VAR1&lt;/TD&gt;&lt;TD&gt;VAR2&lt;/TD&gt;&lt;TD&gt;PARTIAL MATCH?&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.DR. MORRISON&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.MORRISON&lt;/TD&gt;&lt;TD&gt;ABCFG MORRISON&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.DR. MORRISON&lt;/TD&gt;&lt;TD&gt;MORRISON&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.DR. MORRISON&lt;/TD&gt;&lt;TD&gt;DR.WRIGHT&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1. LA HOSPITAL&lt;/TD&gt;&lt;TD&gt;SAN DIEGO&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;</description>
      <pubDate>Sun, 04 Jun 2023 23:55:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879197#M82757</guid>
      <dc:creator>chelm24</dc:creator>
      <dc:date>2023-06-04T23:55:35Z</dc:date>
    </item>
    <item>
      <title>Re: Compare two variables and find any partial match</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879198#M82758</link>
      <description>&lt;P&gt;Define your criteria for "partial match". 4 letters the same in sequence? 5 ? 6? Some other rule?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;There are several SAS functions, COMPGED, SPEDIS and COMPLEV that will provide scores of spelling "distance", or a measure of similarity. I would try all three, and read the documentation, to select which seems to fit your data and need best. The lower the score returned the more similar two variables are.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have;
length VAR1 $100 VAR2 $100;
input VAR1 $ VAR2 $;
infile datalines dlm='|';
Compgedscore = compged(var1, var2);
Complevscore = complev(var1, var2);
Spedisscore  = spedis(var1, var2);
datalines;
1.DR. MORRISON|
1.MORRISON| ABCFG MORRISON
1.DR. MORRISON| MORRISON
1.DR. MORRISON| DR. WRIGHT
1. LA HOSPITAL| SAN DIEGO
;&lt;/PRE&gt;</description>
      <pubDate>Mon, 05 Jun 2023 01:19:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879198#M82758</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2023-06-05T01:19:37Z</dc:date>
    </item>
    <item>
      <title>Re: Compare two variables and find any partial match</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879201#M82759</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;, I need to determine the words that match between 2 variables and not by score. Partial match at least &amp;gt;= 4 letters&amp;nbsp;&lt;SPAN&gt;the same in sequence.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;VAR1&lt;/TD&gt;&lt;TD&gt;VAR2&lt;/TD&gt;&lt;TD&gt;PARTIAL MATCH?&lt;/TD&gt;&lt;TD&gt;MATCH&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.DR. MORRISON&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.MORRISON&lt;/TD&gt;&lt;TD&gt;ABCFG MORRISON&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;TD&gt;MORRISON&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.DR. MORRISON&lt;/TD&gt;&lt;TD&gt;MORRISON&lt;/TD&gt;&lt;TD&gt;YES&lt;/TD&gt;&lt;TD&gt;MORRISON&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1.DR. MORRISON&lt;/TD&gt;&lt;TD&gt;DR.WRIGHT&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1. LA HOSPITAL&lt;/TD&gt;&lt;TD&gt;SAN DIEGO&lt;/TD&gt;&lt;TD&gt;NO&lt;/TD&gt;&lt;TD&gt;&amp;nbsp;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 05 Jun 2023 01:51:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879201#M82759</guid>
      <dc:creator>chelm24</dc:creator>
      <dc:date>2023-06-05T01:51:38Z</dc:date>
    </item>
    <item>
      <title>Re: Compare two variables and find any partial match</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879211#M82760</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/443370"&gt;@chelm24&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;, I need to determine the words that match between 2 variables and not by score. Partial match at least &amp;gt;= 4 letters&amp;nbsp;&lt;SPAN&gt;the same in sequence.&lt;/SPAN&gt;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Exact match of WORD OR exact match of any string of 4 characters within the same WORD.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Above two options should be doable BUT if you go for the option with 4 characters it could then be any two words as long as they share a sequence of 4 identical characters.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 05 Jun 2023 07:45:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879211#M82760</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2023-06-05T07:45:34Z</dc:date>
    </item>
    <item>
      <title>Re: Compare two variables and find any partial match</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879232#M82761</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
length VAR1 $100 VAR2 $100;
input VAR1 $ VAR2 $;
infile datalines dlm='|';

if find(compress(var1,,'ka'),compress(var2,,'ka'),'i') or 
   find(compress(var2,,'ka'),compress(var1,,'ka'),'i') then MATCH='Yes' ;
  else MATCH='No ' ;

datalines;
1.DR. MORRISON|
1.MORRISON| ABCFG MORRISON
1.DR. MORRISON| MORRISON
1.DR. MORRISON| DR. WRIGHT
1. LA HOSPITAL| SAN DIEGO
;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 05 Jun 2023 11:35:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879232#M82761</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2023-06-05T11:35:30Z</dc:date>
    </item>
    <item>
      <title>Re: Compare two variables and find any partial match</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879237#M82762</link>
      <description>&lt;P&gt;Just test each word.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want ;
  set have;
  do i=1 to countw(var1,' ,.()-') until(found);
    word=scan(var1,i,' ,.()-');
    if length(word)&amp;gt;3 then found = 0&amp;lt;findw(var2,word,' ,.()-','it');
  end;
  if not found then do; 
     word=' ';
     i=0;
  end;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;PRE&gt;Obs         VAR1         VAR2              i    found      word

 1     1.DR. MORRISON                      0      0
 2     1.MORRISON        ABCFG MORRISON    2      1      MORRISON
 3     1.DR. MORRISON    MORRISON          3      1      MORRISON
 4     1.DR. MORRISON    DR. WRIGHT        0      0
 5     1. LA HOSPITAL    SAN DIEGO         0      0
&lt;/PRE&gt;</description>
      <pubDate>Mon, 05 Jun 2023 12:33:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Compare-two-variables-and-find-any-partial-match/m-p/879237#M82762</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2023-06-05T12:33:32Z</dc:date>
    </item>
  </channel>
</rss>

