<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Extract Values using PERL Regular Expression in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/391650#M94122</link>
    <description>&lt;PRE&gt;I would like to extract serum creatinine values from these strings. My regular expression does not work. How can I fix it?&lt;BR /&gt;&lt;BR /&gt;data _NULL_;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str1 = 'Serum creatinine: 3160';  * WANT: 3160;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str2 = 'Serum creatinine is 3160';  * WANT: 3160;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str3 = '72(ref range 44-106)';  *WANT: 72 and 44-106;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str4 = '133 H umol/l (49-93)'; *WANT: 133 and 49-93;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str5 = '80(Ref. Int. 52-112 umol/L)'; *WANT: 80 and 52-112;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str6 = 'TEST                     RESULT\.br\COLLECTION DATE 6-FEB-2014\.br\24 HOUR URINE VOLUME 0.100\.br\\.br\SERUM CREATININE LEVEL   511      HI 64 - 110'; *WANT: 511 and 64-110;

&amp;nbsp;&amp;nbsp;&amp;nbsp;reg_ex = prxparse("/((?:.*?)(?=CREATININE)?(?:.*?))\s*(\d+)\s*(\d+\s*-\s*\d+)?/oi"); 

&amp;nbsp;&amp;nbsp;&amp;nbsp;reg_match = prxmatch(reg_ex, str2); 
&amp;nbsp;&amp;nbsp;&amp;nbsp;
&amp;nbsp;&amp;nbsp;&amp;nbsp;paren = prxparen(reg_ex);
&amp;nbsp;&amp;nbsp;&amp;nbsp;put paren=;

&amp;nbsp;run;&lt;/PRE&gt;</description>
    <pubDate>Tue, 29 Aug 2017 19:38:04 GMT</pubDate>
    <dc:creator>verdantsphinx</dc:creator>
    <dc:date>2017-08-29T19:38:04Z</dc:date>
    <item>
      <title>Extract Values using PERL Regular Expression</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/391650#M94122</link>
      <description>&lt;PRE&gt;I would like to extract serum creatinine values from these strings. My regular expression does not work. How can I fix it?&lt;BR /&gt;&lt;BR /&gt;data _NULL_;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str1 = 'Serum creatinine: 3160';  * WANT: 3160;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str2 = 'Serum creatinine is 3160';  * WANT: 3160;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str3 = '72(ref range 44-106)';  *WANT: 72 and 44-106;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str4 = '133 H umol/l (49-93)'; *WANT: 133 and 49-93;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str5 = '80(Ref. Int. 52-112 umol/L)'; *WANT: 80 and 52-112;
&amp;nbsp;&amp;nbsp;&amp;nbsp;str6 = 'TEST                     RESULT\.br\COLLECTION DATE 6-FEB-2014\.br\24 HOUR URINE VOLUME 0.100\.br\\.br\SERUM CREATININE LEVEL   511      HI 64 - 110'; *WANT: 511 and 64-110;

&amp;nbsp;&amp;nbsp;&amp;nbsp;reg_ex = prxparse("/((?:.*?)(?=CREATININE)?(?:.*?))\s*(\d+)\s*(\d+\s*-\s*\d+)?/oi"); 

&amp;nbsp;&amp;nbsp;&amp;nbsp;reg_match = prxmatch(reg_ex, str2); 
&amp;nbsp;&amp;nbsp;&amp;nbsp;
&amp;nbsp;&amp;nbsp;&amp;nbsp;paren = prxparen(reg_ex);
&amp;nbsp;&amp;nbsp;&amp;nbsp;put paren=;

&amp;nbsp;run;&lt;/PRE&gt;</description>
      <pubDate>Tue, 29 Aug 2017 19:38:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/391650#M94122</guid>
      <dc:creator>verdantsphinx</dc:creator>
      <dc:date>2017-08-29T19:38:04Z</dc:date>
    </item>
    <item>
      <title>Re: Extract Values using PERL Regular Expression</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/391695#M94141</link>
      <description>&lt;P&gt;I am not sure if this is possible to acoumplish with single regex in SAS, since you need something like &lt;A href="http://www.rexegg.com/regex-conditionals.html" target="_self"&gt;conditional regular expression&lt;/A&gt; which is not supported in SAS perl regex (actually the /o flag for optimization is also not supported in SAS). So it is easiler to split the task into 2 parts.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data a;
	array str[*] $200 str1-str6;
	str1 = 'Serum creatinine: 3160';  * WANT: 3160;
	str2 = 'Serum creatinine is 3160';  * WANT: 3160;
	str3 = '72(ref range 44-106)';  *WANT: 72 and 44-106;
	str4 = '133 H umol/l (49-93)'; *WANT: 133 and 49-93;
	str5 = '80(Ref. Int. 52-112 umol/L)'; *WANT: 80 and 52-112;
	str6 = 'TEST                     RESULT\.br\COLLECTION DATE 6-FEB-2014\.br\24 HOUR URINE VOLUME 0.100\.br\\.br\SERUM CREATININE LEVEL   511      HI 64 - 110'; *WANT: 511 and 64-110;

	regex1 = prxparse('/(creatinine).*?(\d+)\D*(\d+\s*-\s*\d+)?/i');
	regex2 = prxparse('/(\d+)\D*(\d+\s*-\s*\d+)?/');

	length match $200;

	do i = 1 to 6;
		if prxmatch(regex1, str[i]) then do;
			match = catx(' and ', prxposn(regex1, 2, str[i]), prxposn(regex1, 3, str[i]));
			put 'Found match with creatinine word: ' match;
		end;
		else if prxmatch(regex2, str[i]) then do;
			match = catx(' and ', prxposn(regex2, 1, str[i]), prxposn(regex2, 2, str[i]));
			put 'Found match without creatinine word: ' match;
		end;
		output;
	end;
 run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;This works for your examples. See log and dataset for details.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Also, I suggest you to use something like regex101.com for composing regular expressions before using them in SAS, this will just save you time, since you will se results immidiately - for example this is for your case&amp;nbsp;&lt;A href="https://regex101.com/r/WIVWa8/1" target="_blank"&gt;https://regex101.com/r/WIVWa8/1&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Aug 2017 23:53:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/391695#M94141</guid>
      <dc:creator>sspkmnd</dc:creator>
      <dc:date>2017-08-29T23:53:05Z</dc:date>
    </item>
    <item>
      <title>Re: Extract Values using PERL Regular Expression</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/392461#M94439</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;first step, create a vertical structure of the strings&lt;/P&gt;&lt;PRE&gt;data aatest;
   length str1 $200;
   str1 = 'Serum creatinine: 3160';      *WANT: 3160;output;
   str1 = 'Serum creatinine is 3160';    *WANT: 3160;output;
   str1 = '72(ref range 44-106)';        *WANT: 72 and 44-106;output;
   str1 = '133 H umol/l (49-93)';        *WANT: 133 and 49-93;output;
   str1 = '80(Ref. Int. 52-112 umol/L)'; *WANT: 80 and 52-112;output;
   str1 = 'TEST                     RESULT\.br\COLLECTION DATE 6-FEB-2014\.br\24 HOUR URINE VOLUME 0.100\.br\\.br\SERUM CREATININE LEVEL   511      HI 64 - 110'; *WANT: 511 and 64-110;output;
run;&lt;/PRE&gt;&lt;P&gt;second, standardize the chaos &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;PRE&gt;data aaatest;
   length out $200;
   set aatest;

   regId=prxparse('s/^(?:.*creatinine\s*.[^\d]*\s*)?(\d+).[^\d]*((\d+)\s*(-)\s*(\d+))?.*/$1 $2/i');
   match=prxmatch(regId,str1);
   out=prxchange(regId,-1,str1);
   value=scan(out,1);
   range=compress(substr(out,find(out,' ')));
   if not match then put 'E' 'RROR: unrecognized string pattern, please check' str1=;
run;&lt;/PRE&gt;&lt;P&gt;tadaaa!&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="regexResult.png" style="width: 600px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/14780i105DE384CBBAC8F6/image-size/large?v=v2&amp;amp;px=999" role="button" title="regexResult.png" alt="regexResult.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 01 Sep 2017 09:27:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-Values-using-PERL-Regular-Expression/m-p/392461#M94439</guid>
      <dc:creator>Oligolas</dc:creator>
      <dc:date>2017-09-01T09:27:40Z</dc:date>
    </item>
  </channel>
</rss>

