<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extracting a substring by using regular expressions in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327605#M271683</link>
    <description>&lt;P&gt;Here another option where you first remove any characters before the first and after the last single quote from the input buffer.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
  infile datalines truncover dlm=',' ;
  input @;
  /* remove all characters from input buffer before the first and after the last quote */  
  _infile_=prxchange("s/^[^']*('.+')[^']*$/\1/o",1,_infile_);

  /* map variable against modified input buffer */
  source_rec_no=_n_;
  do while(1);
    input myvar :$20. @;
    if missing(myvar) then leave;
    myvar=dequote(myvar);
    output;
  end;

datalines;
DX(i)('410','412')CC_GRP_1
DX(i)('39891','40201','40211','40291','40401','40403','40411','40413','40491','40493',
'4254','4255','4257','4258','4259','428')CC_GRP_2
DX(i)('0930','4373','440','441','4431','4432','4438','4439','4471','5571','5579','V434')
DX(i)('36234','430','431','432','433','434','435','436','437','438')CC_GRP_4
;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Thu, 26 Jan 2017 01:23:46 GMT</pubDate>
    <dc:creator>Patrick</dc:creator>
    <dc:date>2017-01-26T01:23:46Z</dc:date>
    <item>
      <title>Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327554#M271678</link>
      <description>&lt;P&gt;Hi Everyone,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I would like to extract a list of codes from a particular dataset.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I tried to use regular expressions but unfortunatly didnt manage to exract&amp;nbsp;the whole list of codes but only first occurence of the pattern. The codes are quoted, consist from 3 to 5 digits, start with V or E or numbers {0-9} and continue with numbers.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;U&gt;&lt;STRONG&gt;What I have:&lt;/STRONG&gt;&lt;/U&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;data test;&lt;BR /&gt;length codelist $120;&lt;BR /&gt;input codelist $;&lt;BR /&gt;datalines;&lt;BR /&gt;DX(i)('410','412')CC_GRP_1&lt;BR /&gt;DX(i)('39891','40201','40211','40291','40401','40403','40411','40413','40491','40493',&lt;BR /&gt;'4254','4255','4257','4258','4259','428')CC_GRP_2&lt;BR /&gt;DX(i)('0930','4373','440','441','4431','4432','4438','4439','4471','5571','5579','V434')&lt;BR /&gt;DX(i)('36234','430','431','432','433','434','435','436','437','438')CC_GRP_4&lt;BR /&gt;;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;data test2;&lt;BR /&gt;set test;&lt;BR /&gt;retain pattern_num ;&lt;BR /&gt;if _n_ = 1 then pattern_num = prxparse("/('E|V|\d\d*')/");&lt;BR /&gt;call prxsubstr(pattern_num,strip(codelist),start, lenght);&lt;BR /&gt;run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;U&gt;&lt;STRONG&gt;Desired output:&lt;/STRONG&gt;&lt;/U&gt;&lt;/P&gt;&lt;P&gt;'410','412'&lt;BR /&gt;'39891','40201','40211','40291','40401','40403','40411','40413','40491','40493'&lt;BR /&gt;'4254','4255','4257','4258','4259','428'&lt;BR /&gt;'0930','4373','440','441','4431','4432','4438','4439','4471','5571','5579','V434'&lt;BR /&gt;'36234','430','431','432','433','434','435','436','437','438'&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;</description>
      <pubDate>Wed, 25 Jan 2017 21:54:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327554#M271678</guid>
      <dc:creator>skyvalley81</dc:creator>
      <dc:date>2017-01-25T21:54:45Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327559#M271679</link>
      <description>&lt;P&gt;Is your actual data for the second and third observations two separate observations or should that actually be one row of data in the original source?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If there wasn't a hard break there this would work:&lt;/P&gt;
&lt;PRE&gt;data test;
length codelist pattern $220;
input codelist $;
pattern= scan(codelist,3,'() ');
datalines;
DX(i)('410','412')CC_GRP_1
DX(i)('39891','40201','40211','40291','40401','40403','40411','40413','40491','40493','4254','4255','4257','4258','4259','428')CC_GRP_2
DX(i)('0930','4373','440','441','4431','4432','4438','4439','4471','5571','5579','V434')
DX(i)('36234','430','431','432','433','434','435','436','437','438')CC_GRP_4
;
run;&lt;/PRE&gt;
&lt;P&gt;It really looks like your list should start with "DX(i) ( " and end with ") CC_GRP" but if the pattern isn't consistent the regular expressions may become some what less regular...&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 25 Jan 2017 22:07:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327559#M271679</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2017-01-25T22:07:02Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327565#M271680</link>
      <description>&lt;P&gt;Hi ballardw,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you very much for your prompt reply.&lt;/P&gt;&lt;P&gt;Each line is a separate observation.&lt;/P&gt;&lt;P&gt;Your idea is really good but because there is no consistent pattern the scan function might not work. For example, there are cases that there is no parenthesis at the end of the string. With this regex I can find the position of first match of the pattern. However this not suffice to extract the whole list. I was wondering if there is any way to find the last occurrence of the pattern then I could easily extract the substring.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks again&lt;/P&gt;</description>
      <pubDate>Wed, 25 Jan 2017 22:30:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327565#M271680</guid>
      <dc:creator>skyvalley81</dc:creator>
      <dc:date>2017-01-25T22:30:21Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327581#M271681</link>
      <description>&lt;P&gt;Here is how you could get the codes (this goes a step further than just getting the code strings)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
if not prx1 then prx1 + prxparse("/'[EV0-9]\d{2,4}'/");
if not prx2 then prx2 + prxparse("/[^',)]+$/");
set test;
length line 8 trailler $12 code $8;
line = _n_;
if prxmatch(prx2, codelist) then trailler = prxPosn(prx2, 0, codelist);
start = 1;
stop = length(codelist);
call prxNext(prx1, start, stop, codelist, pos, len);
  do while (pos &amp;gt; 0);
     code = dequote(substr(codelist, pos, len));
     output;
     call prxNext(prx1, start, stop, codelist, pos, len);
  end;
keep line trailler code;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 25 Jan 2017 23:12:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327581#M271681</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-01-25T23:12:00Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327600#M271682</link>
      <description>&lt;P&gt;Hi PGStats,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you very much for your solution the code is exactly what i was looking for &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Could I&amp;nbsp;ask you to explain me the second regex prxparse if it is possible&lt;/P&gt;&lt;PRE class=" language-sas"&gt;&lt;CODE class="  language-sas"&gt;&lt;SPAN class="token function"&gt;prxparse&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;(&lt;/SPAN&gt;&lt;SPAN class="token string"&gt;"/[^',)]+$/"&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 26 Jan 2017 00:48:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327600#M271682</guid>
      <dc:creator>skyvalley81</dc:creator>
      <dc:date>2017-01-26T00:48:03Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327605#M271683</link>
      <description>&lt;P&gt;Here another option where you first remove any characters before the first and after the last single quote from the input buffer.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data test;
  infile datalines truncover dlm=',' ;
  input @;
  /* remove all characters from input buffer before the first and after the last quote */  
  _infile_=prxchange("s/^[^']*('.+')[^']*$/\1/o",1,_infile_);

  /* map variable against modified input buffer */
  source_rec_no=_n_;
  do while(1);
    input myvar :$20. @;
    if missing(myvar) then leave;
    myvar=dequote(myvar);
    output;
  end;

datalines;
DX(i)('410','412')CC_GRP_1
DX(i)('39891','40201','40211','40291','40401','40403','40411','40413','40491','40493',
'4254','4255','4257','4258','4259','428')CC_GRP_2
DX(i)('0930','4373','440','441','4431','4432','4438','4439','4471','5571','5579','V434')
DX(i)('36234','430','431','432','433','434','435','436','437','438')CC_GRP_4
;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 26 Jan 2017 01:23:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327605#M271683</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2017-01-26T01:23:46Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327607#M271684</link>
      <description>Hi Patrick,&lt;BR /&gt;&lt;BR /&gt;Thank you so much, your approach it is also very helpful</description>
      <pubDate>Thu, 26 Jan 2017 01:37:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327607#M271684</guid>
      <dc:creator>skyvalley81</dc:creator>
      <dc:date>2017-01-26T01:37:37Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting a substring by using regular expressions</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327635#M271685</link>
      <description>&lt;PRE class="  language-sas"&gt;&lt;CODE class="  language-sas"&gt;&lt;SPAN class="token string"&gt;"/[^',)]+$/"&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;matches any character except a single quote, a comma, or a closing parenthesis, repeated one or more times until the end of the string.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 26 Jan 2017 04:09:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-a-substring-by-using-regular-expressions/m-p/327635#M271685</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-01-26T04:09:42Z</dc:date>
    </item>
  </channel>
</rss>

