<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extract multiple substrings from string using Regex in New SAS User</title>
    <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644845#M22004</link>
    <description>&lt;P&gt;Just a follow-up question. In case there are duplicated matches per row, is it possible to adapt the code in a simple way so that is doesn't show those duplicates? Basically the matches inside the array should be unique. I guess there was a similar discussion already, but maybe you have different solutions.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://communities.sas.com/t5/SAS-Programming/Removing-repeated-substring-in-String-Observation-from-the/td-p/542382" target="_blank"&gt;https://communities.sas.com/t5/SAS-Programming/Removing-repeated-substring-in-String-Observation-from-the/td-p/542382&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://communities.sas.com/t5/General-SAS-Programming/remove-duplicate-numbers-in-a-string-of-numbers/td-p/126513" target="_blank"&gt;https://communities.sas.com/t5/General-SAS-Programming/remove-duplicate-numbers-in-a-string-of-numbers/td-p/126513&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
    <pubDate>Sun, 03 May 2020 14:54:19 GMT</pubDate>
    <dc:creator>Matos</dc:creator>
    <dc:date>2020-05-03T14:54:19Z</dc:date>
    <item>
      <title>Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644809#M21998</link>
      <description>&lt;P&gt;Dear all,&lt;/P&gt;&lt;P&gt;I am new with Regex and I would like to use it to extract from a string all substrings that match a specific pattern. The output should display, in a new column (called extraction), all matching substrings together separated by a space. Below you can find the initial dataset.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;

input text &amp;amp; $200.;

cards;

{Z(A_15.01.n)+{P(Z_18.10.c)999(Y_18.04.a)B(999)} + {T(X_20.99.d)}

DATA:Y(A_12.00)&amp;lt;=Z(Y_99.99)

A(W_99.99.a)

{?xxZ_99.00} {Z(A_15.07.n)}

;

run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The matching string could have the following pattern:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;- one uppercase letter, followed by underscore and two digits, a dot plus another two digits (ex. Z_99.00) or&lt;/P&gt;&lt;P&gt;- one uppercase letter, followed by underscore and two digits, a dot plus another two digits plus another dot and letter (ex: A_15.00.n)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Desired result:&lt;/P&gt;&lt;DIV class="mceNonEditable lia-copypaste-placeholder"&gt;&amp;nbsp;&lt;/DIV&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;text&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Extraction&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;{Z(A_15.01.n)+{P(Z_18.10.c)999(Y_18.04.a)B(999)} + {T(X_20.99.d)}&lt;/TD&gt;&lt;TD&gt;A_15.01.n Z_18.10.c Y_18.04.a X_20.99.d&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;DATA:Y(A_12.00)&amp;lt;=Z(Y_99.99)&lt;/TD&gt;&lt;TD&gt;A_12.00 Y_99.99&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;A(W_99.99.a)&lt;/TD&gt;&lt;TD&gt;W_99.99.a&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;{?xxZ_99.00} {Z(A_15.07.n)}&lt;/TD&gt;&lt;TD&gt;Z_99.00 A_15.07.n&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;DIV class="mceNonEditable lia-copypaste-placeholder"&gt;&amp;nbsp;&lt;/DIV&gt;&lt;P&gt;Any help on this topic appreciated.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Many thanks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 03 May 2020 10:02:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644809#M21998</guid>
      <dc:creator>Matos</dc:creator>
      <dc:date>2020-05-03T10:02:12Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644811#M21999</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/247128"&gt;@Matos&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Here is a way to achieve this:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	input text &amp;amp; $200.;
	cards;
{Z(A_15.01.n)+{P(Z_18.10.c)999(Y_18.04.a)B(999)} + {T(X_20.99.d)}
DATA:Y(A_12.00)&amp;lt;=Z(Y_99.99)
A(W_99.99.a)
{?xxZ_99.00} {Z(A_15.07.n)}
;
run;

data want;
	set have;
	length extraction $ 200;
	array _extraction (200) $ 200;
	expressionID = prxparse('/[A-Z]_\d{2}\.\d{2}(\.[a-z])?/');
	start = 1;
	stop = length(text);
	
	call prxnext(ExpressionID, start, stop, text, position, length);
      do while (position &amp;gt; 0);
         _extraction(position) = substr(text, position, length);
         call prxnext(ExpressionID, start, stop, text, position, length);
      end;	
     
    extraction = catx(" ",of _extraction(*));
      
	drop _: expressionID start	stop position length ;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Output:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Capture d’écran 2020-05-03 à 12.31.12.png" style="width: 566px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39009i89ADCADC5024075D/image-size/large?v=v2&amp;amp;px=999" role="button" title="Capture d’écran 2020-05-03 à 12.31.12.png" alt="Capture d’écran 2020-05-03 à 12.31.12.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You can find interesting documentation here:&lt;/P&gt;
&lt;P&gt;- CALL PRXNEXT Routine&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;A href="https://documentation.sas.com/?docsetId=lefunctionsref&amp;amp;docsetTarget=n1obc9u7z3225mn1npwnassehff0.htm&amp;amp;docsetVersion=9.4&amp;amp;locale=en#n0cuqlohpjdn1bn1t25xqxxz871t"&gt;https://documentation.sas.com/?docsetId=lefunctionsref&amp;amp;docsetTarget=n1obc9u7z3225mn1npwnassehff0.htm&amp;amp;docsetVersion=9.4&amp;amp;locale=en#n0cuqlohpjdn1bn1t25xqxxz871t&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;- SAS REGEX TIP SHEET&amp;nbsp;&lt;A href="https://support.sas.com/rnd/base/datastep/perl_regexp/regexp-tip-sheet.pdf" target="_blank" rel="noopener"&gt;https://support.sas.com/rnd/base/datastep/perl_regexp/regexp-tip-sheet.pdf&lt;/A&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Hope this helps!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;All the best,&lt;/P&gt;</description>
      <pubDate>Sun, 03 May 2020 10:34:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644811#M21999</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-03T10:34:35Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644812#M22000</link>
      <description>Thank you very much, also for the documentation! Really useful.</description>
      <pubDate>Sun, 03 May 2020 10:46:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644812#M22000</guid>
      <dc:creator>Matos</dc:creator>
      <dc:date>2020-05-03T10:46:27Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644814#M22001</link>
      <description>&lt;P&gt;please try the below code&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input text &amp;amp; :$200.;
cards;
{Z(A_15.01.n)+{P(Z_18.10.c)999(Y_18.04.a)B(999)} + {T(X_20.99.d)}
DATA:Y(A_12.00)&amp;lt;=Z(Y_99.99)
A(W_99.99.a)
{?xxZ_99.00} {Z(A_15.07.n)}
;
run;

data want;  
set have;   
start=1;   
stop=length(text);   
pattern_id=prxparse('/(\w\_\d{2,2}\.\d{2,2}\.\w)|(\w\_\d{2,2}\.\d{2,2})/');   
call prxnext(pattern_id,start,stop,text,position,length);    
do while(position&amp;gt;0);    
new_text=substr(text,position,length);
call prxnext(pattern_id,start,stop,text,position,length);  
output; 
end; 
run;

data want2;
set want;
by text notsorted;
retain new_text2;
if first.text then new_text2=new_text;
else new_text2=catx(' ',new_text2,new_text);
if last.text;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 03 May 2020 11:07:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644814#M22001</guid>
      <dc:creator>Jagadishkatam</dc:creator>
      <dc:date>2020-05-03T11:07:38Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644819#M22002</link>
      <description>You're welcome &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/247128"&gt;@Matos&lt;/a&gt; !</description>
      <pubDate>Sun, 03 May 2020 11:22:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644819#M22002</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-03T11:22:15Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644822#M22003</link>
      <description>Amazing Jag! That's also a nice way to achieve it.</description>
      <pubDate>Sun, 03 May 2020 11:50:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644822#M22003</guid>
      <dc:creator>Matos</dc:creator>
      <dc:date>2020-05-03T11:50:12Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644845#M22004</link>
      <description>&lt;P&gt;Just a follow-up question. In case there are duplicated matches per row, is it possible to adapt the code in a simple way so that is doesn't show those duplicates? Basically the matches inside the array should be unique. I guess there was a similar discussion already, but maybe you have different solutions.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://communities.sas.com/t5/SAS-Programming/Removing-repeated-substring-in-String-Observation-from-the/td-p/542382" target="_blank"&gt;https://communities.sas.com/t5/SAS-Programming/Removing-repeated-substring-in-String-Observation-from-the/td-p/542382&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://communities.sas.com/t5/General-SAS-Programming/remove-duplicate-numbers-in-a-string-of-numbers/td-p/126513" target="_blank"&gt;https://communities.sas.com/t5/General-SAS-Programming/remove-duplicate-numbers-in-a-string-of-numbers/td-p/126513&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Sun, 03 May 2020 14:54:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644845#M22004</guid>
      <dc:creator>Matos</dc:creator>
      <dc:date>2020-05-03T14:54:19Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644853#M22006</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/247128"&gt;@Matos&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I have modified the code as follows so as to suppress the array and add a proc transpose after managing duplicates. Does that answer your question?&lt;/P&gt;
&lt;P&gt;Best,&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	input text &amp;amp; $200.;
	cards;
{Z(A_15.01.n)+{P(Z_18.10.c)999(Y_18.04.a)B(999)} + {T(X_20.99.d)}
DATA:Y(A_12.00)&amp;lt;=Z(Y_99.99) DATA:Y(A_12.00)
A(W_99.99.a) A(W_99.99.a)
{?xxZ_99.00} {Z(A_15.07.n)}
;
run;

data have2;
	set have;
	length extraction $ 200;
	expressionID = prxparse('/[A-Z]_\d{2}\.\d{2}(\.[a-z])?/');
	start = 1;
	stop = length(text);
	
	call prxnext(ExpressionID, start, stop, text, position, length);
      do while (position &amp;gt; 0);
         extraction = strip(substr(text, position, length));
         call prxnext(ExpressionID, start, stop, text, position, length);
         output;
      end;	
      
	drop expressionID start	stop position length ;
run;

/* With "duplicates" */

proc sort data=have2 ;
	by text extract:;
run;

proc transpose data=have2 out=have_tr (drop=_:) prefix=extract;
	var extraction;
	by text notsorted;
run;

data want1;
	set have_tr;
	length extraction $ 200;
	extraction = catx(" ", of extract:);
run;

proc print;
	title "With potential duplicate matches";
run;

/* Without "duplicates" */

proc sort data=have2 nodupkey;
	by text extract:;
run;

proc transpose data=have2 out=have_tr (drop=_:) prefix=extract;
	var extraction;
	by text notsorted;
run;

data want2;
	set have_tr;
	length extraction $ 200;
	extraction = catx(" ", of extract:);
run;

proc print data=want2;
	title "Without duplicate matches";
run;

title;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-left" image-alt="Capture d’écran 2020-05-03 à 18.00.00.png" style="width: 822px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/39011i4B0C407626A97E7C/image-size/large?v=v2&amp;amp;px=999" role="button" title="Capture d’écran 2020-05-03 à 18.00.00.png" alt="Capture d’écran 2020-05-03 à 18.00.00.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 03 May 2020 16:03:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644853#M22006</guid>
      <dc:creator>ed_sas_member</dc:creator>
      <dc:date>2020-05-03T16:03:04Z</dc:date>
    </item>
    <item>
      <title>Re: Extract multiple substrings from string using Regex</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644857#M22007</link>
      <description>&lt;P&gt;Thanks Ed! You offered several interesting options, very useful. We could also take have2 (first time it was generated) and apply proc sql disctinct to exlcude duplicates.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;create table want2&amp;nbsp;
SELECT DISTINCT text, extraction
from have2;
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 03 May 2020 16:20:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Extract-multiple-substrings-from-string-using-Regex/m-p/644857#M22007</guid>
      <dc:creator>Matos</dc:creator>
      <dc:date>2020-05-03T16:20:15Z</dc:date>
    </item>
  </channel>
</rss>

