<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Regular expression for Unicode letters in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108452#M22540</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Would ANYALPHA help.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2818&amp;nbsp; data _null_;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2819&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; a='0áñéúáóíÁÑçâèôïéêëààñÉ';&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2820&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; b=anyalpha(a);&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2821&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; put _all_;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2822&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt; &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;a=0áñéúáóíÁÑçâèôïéêëààñÉ b=2 _ERROR_=0 _N_=1&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Tue, 08 May 2012 23:34:03 GMT</pubDate>
    <dc:creator>data_null__</dc:creator>
    <dc:date>2012-05-08T23:34:03Z</dc:date>
    <item>
      <title>Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108451#M22539</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I want to match all English and international letters like áñéúáóíÁÑçâèôïéêëààñÉ, but I don't want to match underscore and whatever else regex consider to be part of a "word."&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;In the following article, I tried \p{L} but it doesn't work in SAS 9.2&lt;/P&gt;&lt;P&gt;&lt;A href="http://stackoverflow.com/questions/2392194/how-to-match-the-international-alphabet-english-a-z-non-english-with-a-regu" title="http://stackoverflow.com/questions/2392194/how-to-match-the-international-alphabet-english-a-z-non-english-with-a-regu"&gt;regex - How to match the international alphabet (English a-z, + non English) with a regular expression? - Stack Overflow&lt;/A&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 08 May 2012 22:50:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108451#M22539</guid>
      <dc:creator>AndrewZ</dc:creator>
      <dc:date>2012-05-08T22:50:34Z</dc:date>
    </item>
    <item>
      <title>Re: Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108452#M22540</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Would ANYALPHA help.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2818&amp;nbsp; data _null_;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2819&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; a='0áñéúáóíÁÑçâèôïéêëààñÉ';&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2820&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; b=anyalpha(a);&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2821&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; put _all_;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2822&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt; &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;a=0áñéúáóíÁÑçâèôïéêëààñÉ b=2 _ERROR_=0 _N_=1&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 08 May 2012 23:34:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108452#M22540</guid>
      <dc:creator>data_null__</dc:creator>
      <dc:date>2012-05-08T23:34:03Z</dc:date>
    </item>
    <item>
      <title>Re: Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108453#M22541</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I am not sure from the way you ask you question, but I this you want to remove all non-alpha characters from a string that contains both english and non-english (such as French, Spanish, or German, i.e. non DBCS languages).&amp;nbsp; \p is not a valid metacharacter for SAS, even though it is for perl.&amp;nbsp; There is nothing directly equivalent that comes to mind however I believe [[:alpha:]] with work for your needs.&amp;nbsp; This could also be accomplished using compress.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;13&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; data _null_;&lt;/P&gt;&lt;P&gt;14&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; a='0áñéúáóí-ÁÑçâè _ôïéêëààñÉ';&lt;/P&gt;&lt;P&gt;15&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; b=prxchange('s/[[:^alpha:]]//o',-1,a);&lt;/P&gt;&lt;P&gt;16&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; c=compress(a,,'ka');&lt;/P&gt;&lt;P&gt;17&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; put (a--c) (=/);&lt;/P&gt;&lt;P&gt;18&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;a=0áñéúáóí-ÁÑçâè _ôïéêëààñÉ&lt;/P&gt;&lt;P&gt;b=áñéúáóíÁÑçâèôïéêëààñÉ&lt;/P&gt;&lt;P&gt;c=áñéúáóíÁÑçâèôïéêëààñÉ&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you do care about DBCS languages look into KCOMPRESS&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 09 May 2012 00:01:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108453#M22541</guid>
      <dc:creator>FriedEgg</dc:creator>
      <dc:date>2012-05-09T00:01:11Z</dc:date>
    </item>
    <item>
      <title>Re: Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108454#M22542</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I would like to use KSUBSTR() to pull out a single character each time and compare it with the values you don't want. Some Dummy code like:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;i=1;&lt;/P&gt;&lt;P&gt;_temp=ksubstr(name,i,1);&lt;/P&gt;&lt;P&gt;do while(not missing(_temp));&lt;/P&gt;&lt;P&gt;if&amp;nbsp; _temp&amp;nbsp; not in ('_' '0' '1' 'a' 'b') then put 'Found:'&amp;nbsp; _temp;&lt;/P&gt;&lt;P&gt;i+1;&lt;/P&gt;&lt;P&gt;_temp=ksubstr(name,i,1);&lt;/P&gt;&lt;P&gt;end;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;But I think it is just beginning, This problem is very annoying for me for a long time .&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 09 May 2012 08:12:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108454#M22542</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-05-09T08:12:28Z</dc:date>
    </item>
    <item>
      <title>Re: Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108455#M22543</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thank you for the reply data_null_.&amp;nbsp; ANYALPHA is interesting but not helpful.&amp;nbsp; To be more specific, I want to make sure a person's name (as it is recorded) contains only valid characters which are alphabetic characters (including international characters), space, period, and hyphen.&amp;nbsp; For example&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Good: Martha Jones-Smith&lt;/P&gt;&lt;P&gt;Invalid: Martha Jones=Smith&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Good: Robert Smith Jr.&lt;/P&gt;&lt;P&gt;Invalid: Robert Smith Jr. (Bob)&lt;/P&gt;&lt;P&gt;Invalid: Robert Smith Jr,&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 09 May 2012 15:28:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108455#M22543</guid>
      <dc:creator>AndrewZ</dc:creator>
      <dc:date>2012-05-09T15:28:39Z</dc:date>
    </item>
    <item>
      <title>Re: Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108456#M22544</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thank you for the replies, FriedEgg and Ksharp, but I don't want to change any characters.&amp;nbsp; I'm validating a person's name contains only valid characters (explained more in post #4).&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 09 May 2012 15:31:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108456#M22544</guid>
      <dc:creator>AndrewZ</dc:creator>
      <dc:date>2012-05-09T15:31:51Z</dc:date>
    </item>
    <item>
      <title>Re: Regular expression for Unicode letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108457#M22545</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2965&amp;nbsp; data _null_;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2966&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; infile cards dsd dlm=':';&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2967&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; input x $ name:$26.;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2968&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; valid = not lengthn(compress(name,' .-','A'));&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2969&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; put (_all_)(=);&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;2970&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cards;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt; &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Good name=Martha Jones-Smith valid=1&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Invalid name=Martha Jones=Smith valid=0&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Good name=Robert Smith Jr. valid=1&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Invalid name=Robert Smith Jr. (Bob) valid=0&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Invalid name=Robert Smith Jr, valid=0&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Invalid name=0áñéúáóíÁÑçâèôïéêëààñÉ valid=0&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-size: 8pt; font-family: 'SAS Monospace';"&gt;x=Good name=áñéúáóíÁÑçâèôïéêëààñÉ valid=1&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 09 May 2012 16:12:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Regular-expression-for-Unicode-letters/m-p/108457#M22545</guid>
      <dc:creator>data_null__</dc:creator>
      <dc:date>2012-05-09T16:12:10Z</dc:date>
    </item>
  </channel>
</rss>

