<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Compare strings with symbols from different alphabets in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249176#M46837</link>
    <description>&lt;P&gt;I want to compare two strings which contains symbols from different alphabets (e.g. Russian and English). I want that symbols which looks similarly is considered as equal to each other.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;E.g. in the word "Mom" letter "o" is from English alphabet (code 043E in Unicode), and in the world "Mоm" letter "о" is from Russian alphabet (code 006F in Unicode). So &lt;CODE&gt;("Mom" = "Mоm")&lt;/CODE&gt; =&amp;gt; false, but I want it would be true. Is there some standard SAS function or I should wright a macro to do it.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks!&lt;/P&gt;</description>
    <pubDate>Wed, 10 Feb 2016 13:05:10 GMT</pubDate>
    <dc:creator>DmitryErshov</dc:creator>
    <dc:date>2016-02-10T13:05:10Z</dc:date>
    <item>
      <title>Compare strings with symbols from different alphabets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249176#M46837</link>
      <description>&lt;P&gt;I want to compare two strings which contains symbols from different alphabets (e.g. Russian and English). I want that symbols which looks similarly is considered as equal to each other.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;E.g. in the word "Mom" letter "o" is from English alphabet (code 043E in Unicode), and in the world "Mоm" letter "о" is from Russian alphabet (code 006F in Unicode). So &lt;CODE&gt;("Mom" = "Mоm")&lt;/CODE&gt; =&amp;gt; false, but I want it would be true. Is there some standard SAS function or I should wright a macro to do it.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks!&lt;/P&gt;</description>
      <pubDate>Wed, 10 Feb 2016 13:05:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249176#M46837</guid>
      <dc:creator>DmitryErshov</dc:creator>
      <dc:date>2016-02-10T13:05:10Z</dc:date>
    </item>
    <item>
      <title>Re: Compare strings with symbols from different alphabets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249847#M47058</link>
      <description>&lt;P&gt;Can you use the KTranslate function to convert the non-first-7-bit characters to the 7-bit versions? So all of the "o" versions above 7F would first be converted, then do your compare (e.g. convert 043E to 006F).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Tom&lt;/P&gt;</description>
      <pubDate>Sat, 13 Feb 2016 03:34:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249847#M47058</guid>
      <dc:creator>TomKari</dc:creator>
      <dc:date>2016-02-13T03:34:22Z</dc:date>
    </item>
    <item>
      <title>Re: Compare strings with symbols from different alphabets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249869#M47061</link>
      <description>&lt;P&gt;Thanks a lot Tom!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;One more question. I want to check that some letter belongs to Russian alphabet. I can do it with direct comparizon with Cyrillic letters:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;letter in ('А', 'Б', 'В', 'Г', 'Д', 'Ж', ...)&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is there some simpler approach? E.g. for English alphabet I could use rank() function:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;rank('A') &amp;lt;= rank(letter) &amp;lt;= rank('z')&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;But this function doesn't work for UTF-8 encoding. How can I get position of the letter in UTF-8 table?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Regards,&lt;/P&gt;
&lt;P&gt;Dmitry&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 13 Feb 2016 12:27:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249869#M47061</guid>
      <dc:creator>DmitryErshov</dc:creator>
      <dc:date>2016-02-13T12:27:10Z</dc:date>
    </item>
    <item>
      <title>Re: Compare strings with symbols from different alphabets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249878#M47067</link>
      <description>&lt;P&gt;Hi, Dmitry&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm definitely getting onto thinner ice here, but since it looks like the Cyrillic characters are between x'0400' and x'0513', what if you just coded&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;'0400'x &amp;lt;= letter &amp;lt;= '0513'x&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm not set up to try internationalized stuff...give it a try and post back what happens.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Tom&lt;/P&gt;</description>
      <pubDate>Sat, 13 Feb 2016 15:31:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/249878#M47067</guid>
      <dc:creator>TomKari</dc:creator>
      <dc:date>2016-02-13T15:31:32Z</dc:date>
    </item>
    <item>
      <title>Re: Compare strings with symbols from different alphabets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/250263#M47173</link>
      <description>&lt;P&gt;Thanks Tom! I also coded some functions to deal with keybord layout misprints. Here is code:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/***************************************************************************/
/* FUNCTION count_rus_letters RETURNS NUMBER OF CYRILLIC LETTERS IN STRING */
/***************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring;
FUNCTION count_rus_letters(string $);
length letter $2;

rus_count=0;

len=klength(string);

do i=1 to len;
  letter=ksubstr(string,i,1);
  if letter in ("А","а","Б","б","В","в","Г","г","Д","д","Е","е","Ё","ё","Ж","ж"
      "З","з","И","и","Й","й","К","к","Л","л","М","м","Н","н","О","о","П","п","Р","р",
      "С","с","Т","т","У","у","Ф","ф","Х","х","Ц","ц","Ч","ч","Ш","ш","Щ","щ","Ъ","ъ"
      "Ы","ы","Ь","ь","Э","э","Ю","ю","Я","я") 
  then rus_count+1;
end;

return(rus_count);
endsub;
run;

/**************************************************************************/
/* FUNCTION count_eng_letters RETURNS NUMBER OF ENGLISH LETTERS IN STRING */
/**************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring;
FUNCTION count_eng_letters(string $);
length letter $2;

eng_count=0;

len=klength(string);

do i=1 to len;
  letter=ksubstr(string,i,1);
  if rank('A') &amp;lt;= rank(letter) &amp;lt;=rank('z') 
  then eng_count+1;
end;

return(eng_count);
endsub;
run;

/**************************************************************************/
/* FUNCTION is_string_russian RETURNS 1 IF NUMBER OF RUSSIAN SYMBOLS IN   */
/* STRING &amp;gt;= NUMBER OF ENGLISH SYMBOLS                                    */
/**************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring;
FUNCTION is_string_russian(string $);
length letter $2 result 8;

eng_count=0;
rus_count=0;

len=klength(string);

do i=1 to len;
  letter=ksubstr(string,i,1);
  if letter in ("А","а","Б","б","В","в","Г","г","Д","д","Е","е","Ё","ё","Ж","ж"
      "З","з","И","и","Й","й","К","к","Л","л","М","м","Н","н","О","о","П","п","Р","р",
      "С","с","Т","т","У","у","Ф","ф","Х","х","Ц","ц","Ч","ч","Ш","ш","Щ","щ","Ъ","ъ"
      "Ы","ы","Ь","ь","Э","э","Ю","ю","Я","я") 
  then rus_count+1;
  if rank('A') &amp;lt;= rank(letter) &amp;lt;=rank('z') 
  then eng_count+1;
end;

if rus_count&amp;gt;=eng_count
then result=1;
else result=0;

return(result);
endsub;
run;

/**************************************************************************/
/* FUNCTION fix_layout_misprints REPLACES MISPRINTED SYMBOLS BY ANALYSING */
/* LANGUAGE OF THE STRING (FOR ENGLISH STRING RUSSIAN SYMBOLS ARE         */
/* REPLACED BY ENGLISH COPIES AND FOR RUSSIAN STRING SYMBOLS ARE          */
/* REPLACED BY RUSSIAN COPIES)                                            */
/**************************************************************************/ 
proc fcmp outlib=sasuser.userfuncs.mystring;
FUNCTION fix_layout_misprints(string $) $ 1000;
length letter $2 result $1000;

eng_count=0;
rus_count=0;

len=klength(string);

do i=1 to len;
  letter=ksubstr(string,i,1);
  if letter in ("А","а","Б","б","В","в","Г","г","Д","д","Е","е","Ё","ё","Ж","ж"
      "З","з","И","и","Й","й","К","к","Л","л","М","м","Н","н","О","о","П","п","Р","р",
      "С","с","Т","т","У","у","Ф","ф","Х","х","Ц","ц","Ч","ч","Ш","ш","Щ","щ","Ъ","ъ"
      "Ы","ы","Ь","ь","Э","э","Ю","ю","Я","я") 
  then rus_count+1;
  if rank('A') &amp;lt;= rank(letter) &amp;lt;=rank('z') 
  then eng_count+1;
end;

if rus_count&amp;gt;=eng_count
then result=ktranslate(string,"АаВЕеКкМОоРрСсТХх","AaBEeKkMOoPpCcTXx");
else result=ktranslate(string,"AaBEeKkMOoPpCcTXx","АаВЕеКкМОоРрСсТХх");

return(result);
endsub;
run;

/***********/
/* EXAMPLE */
/***********/
options cmplib=sasuser.userfuncs;
data _null_;
good_str="Иванов";
err_str="Ивaнов";
fixed_str=fix_layout_misprints(err_str);

put "Good string=" good_str;
put "Error string=" err_str;
put "Fixed string=" fixed_str;

rus_count_in_err=count_rus_letters(err_str);
put "Count or Cyrillic symbols in error string=" rus_count_in_err;

eng_count_in_err=count_eng_letters(err_str);
put "Count or English symbols in error string=" eng_count_in_err;

is_error_str_russian=is_string_russian(err_str);
put "Is error string language Russian=" is_error_str_russian;

if (good_str ne err_str) 
then put "Before clearing - strings are not equal to each other";

if (good_str = fixed_str) 
then put "After clearing - strings are equal to each other";
run;&lt;/CODE&gt;&amp;nbsp;&lt;/PRE&gt;</description>
      <pubDate>Tue, 16 Feb 2016 10:26:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/250263#M47173</guid>
      <dc:creator>DmitryErshov</dc:creator>
      <dc:date>2016-02-16T10:26:10Z</dc:date>
    </item>
    <item>
      <title>Re: Compare strings with symbols from different alphabets</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/250371#M47211</link>
      <description>&lt;P&gt;Very cool!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Tom&lt;/P&gt;</description>
      <pubDate>Tue, 16 Feb 2016 16:21:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Compare-strings-with-symbols-from-different-alphabets/m-p/250371#M47211</guid>
      <dc:creator>TomKari</dc:creator>
      <dc:date>2016-02-16T16:21:04Z</dc:date>
    </item>
  </channel>
</rss>

