<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extract text using Prxsubstr in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958785#M374183</link>
    <description>&lt;P&gt;You can avoid needing to make the macro with the number of patterns.&amp;nbsp; Just make the array large enough for the maximum number of patterns&amp;nbsp;&lt;SPAN&gt;you ever expect to have to handle.&amp;nbsp; &lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;This example uses is set to handle 9,999 patterns.&amp;nbsp; But even 99,999 or more should not cause any trouble. Just make sure to adjust the array size and the length of the MATCHTYPE variable.&amp;nbsp; (Or just keep the loop counter numeric variable instead.)&lt;/SPAN&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set have;
* Create variable match with same length as variable street ;
  if 0 then match=street;
* Set length of MATCHTYPE long enough for up to 9999 patterns ;
  length matchtype $11;
* Make array large enough for 9999 patterns ;
  array expr_id [9999] _temporary_;
  if _n_=1 then do pattern=1 to nobs;
* Parse regex patterns into array ;
    set patterns nobs=nobs;
    expr_id[pattern]=prxparse(strip(regex));
  end;
* Output any matches  ;
  do pattern=1 to nobs;
    call prxsubstr(expr_id[pattern], street, position, length);
    if position&amp;gt; 0 then do;
      match=substr(street, position, length);
      matchtype=cats('pattern', pattern);
      output;
    end;
  end;
  drop regex pattern;
run;&lt;/CODE&gt;&amp;nbsp;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 10 Feb 2025 04:18:41 GMT</pubDate>
    <dc:creator>Tom</dc:creator>
    <dc:date>2025-02-10T04:18:41Z</dc:date>
    <item>
      <title>Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958778#M374177</link>
      <description>&lt;P&gt;I want to extract text from a string using mutiples patterns. I am getting error "&lt;SPAN&gt;The PRXPARSE function call does not have enough arguments.".&lt;/SPAN&gt;&lt;/P&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;data have;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;
&lt;DIV&gt;&amp;nbsp; input street $80.;&lt;/DIV&gt;
&lt;DIV&gt;datalines;&lt;/DIV&gt;
&lt;DIV&gt;Bldg A 153 First Street&lt;/DIV&gt;
&lt;DIV&gt;6789 64th Ave&lt;/DIV&gt;
&lt;DIV&gt;4 Moritz Road&lt;/DIV&gt;
&lt;DIV&gt;7493 Wilkes Place&lt;/DIV&gt;
&lt;DIV&gt;;&lt;/DIV&gt;
&lt;DIV&gt;run;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;%macro test;&lt;/DIV&gt;
&lt;DIV&gt;%global p1 p2 p3 p4&amp;nbsp; ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;data want;&lt;/DIV&gt;
&lt;DIV&gt;set have;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;pattern1 = "m/\d+\s[a-z]+\s[a-z]+/oi";&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; &amp;nbsp;pattern2 = "m/Pl|place/i";&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; pattern3 = "m/rd|road/i";&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; pattern4 = "m/ave|avenue/i";&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;%do i=1 %to 4;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; call symputx(cats('p',&amp;amp;i), cats('pattern',&amp;amp;i.) , 'g');&lt;/DIV&gt;
&lt;DIV&gt;%end;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;%do j=1 %to 4;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; ExpressionID = prxparse(&amp;amp;&amp;amp;p&amp;amp;j);&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;call prxsubstr(ExpressionID, street, position, length);&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;%if length&amp;gt; 0 %then&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;%do;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; match = substr(street, position, length);&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; matchtype=cats('pattern',&amp;amp;j);&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; output;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;%end;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;%end;&lt;/DIV&gt;
&lt;DIV&gt;drop Pattern:;&lt;/DIV&gt;
&lt;DIV&gt;run;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;%mend;&lt;/DIV&gt;
&lt;DIV&gt;%test;&lt;/DIV&gt;
&lt;/DIV&gt;</description>
      <pubDate>Sun, 09 Feb 2025 22:29:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958778#M374177</guid>
      <dc:creator>SK_11</dc:creator>
      <dc:date>2025-02-09T22:29:19Z</dc:date>
    </item>
    <item>
      <title>Re: Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958779#M374178</link>
      <description>&lt;P&gt;Don't use macro language if not necessary. It only makes debugging harder.&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  input street $80.;
  datalines;
Bldg A 153 First Street
6789 64th Ave
4 Moritz Road
7493 Wilkes Place
;
run;

data want;
  set have;
  if _n_=1 then
    do;
      pattern1="m/\d+\s[a-z]+\s[a-z]+/i";
      pattern2="m/Pl|place/i";
      pattern3="m/rd|road/i";
      pattern4="m/ave|avenue/i";
      array patterns{4} pattern1 - pattern4;
      array expr_id {4} _temporary_;
      do i=1 to dim(patterns);
        expr_id[i]=prxparse(patterns[i]);
      end;
      length matchtype $8;
      /* create variable match with same length as variable street */
      if 0 then match=street;
    end;

  do i=1 to dim(patterns);
    call prxsubstr(expr_id[i], street, position, length);
    if position&amp;gt; 0 then 
      do;
        match=substr(street, position, length);
        matchtype=cats('pattern', i);
        output;
      end;
  end;
  drop Pattern: i;
run;

proc print data=want;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;Or even shorter:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set have;
  if _n_=1 then
    do;
      array expr_id {4} _temporary_;
      expr_id[1]=prxparse("m/\d+\s[a-z]+\s[a-z]+/i");
      expr_id[2]=prxparse("m/Pl|place/i");
      expr_id[3]=prxparse("m/rd|road/i");
      expr_id[4]=prxparse("m/ave|avenue/i");
      /* create variable match with same length as variable street */
      if 0 then match=street;
      length matchtype $8;
    end;

  do i=1 to dim(expr_id);
    call prxsubstr(expr_id[i], street, position, length);
    if position&amp;gt; 0 then 
      do;
        match=substr(street, position, length);
        matchtype=cats('pattern', i);
        output;
      end;
  end;
  drop i;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 09 Feb 2025 23:51:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958779#M374178</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2025-02-09T23:51:03Z</dc:date>
    </item>
    <item>
      <title>Re: Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958780#M374179</link>
      <description>&lt;P&gt;Hi Patrick&lt;/P&gt;
&lt;P&gt;Thanks for your prompt reply. Is there anyway you can separate the pattern and prxsubstr code into two data steps, I want to use the same pattern for multiple data. Thanks a lot&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 10 Feb 2025 00:02:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958780#M374179</guid>
      <dc:creator>SK_11</dc:creator>
      <dc:date>2025-02-10T00:02:36Z</dc:date>
    </item>
    <item>
      <title>Re: Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958781#M374180</link>
      <description>&lt;P&gt;Something like below should do.&amp;nbsp;&lt;BR /&gt;I've modified your RegEx adding word boundary metacharacter \b so your 2nd regex does not match M&lt;STRONG&gt;ap&lt;/STRONG&gt;le Street&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  input street $80.;
  datalines;
Bldg A 153 First Street
6789 64th Ave
4 Moritz Road
7493 Wilkes Place
711 Maple Street
;
run;

data patterns;
  input regex :$100.;
  datalines;
m/\d+\s[a-z]+\s[a-z]+/i
m/\b(Pl|place)\b/i
m/\b(rd|road)\b/i
m/\b(ave|avenue)\b/i
;
run;

data _null_;
  call symputx('n_patterns',nobs);
  stop;
  set patterns nobs=nobs;
run;

data want;
  set have;
  if _n_=1 then
    do;
      array expr_id {&amp;amp;n_patterns} _temporary_;
      do i=1 by 1 until(last);
        set patterns end=last;
        expr_id[i]=prxparse(strip(regex));
      end;
      /* create variable match with same length as variable street */
      if 0 then match=street;
      length matchtype $8;
    end;

  do i=1 to dim(expr_id);
    call prxsubstr(expr_id[i], street, position, length);
    if position&amp;gt; 0 then 
      do;
        match=substr(street, position, length);
        matchtype=cats('pattern', i);
        output;
      end;
  end;
  drop regex i;
run;

proc print data=want;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 10 Feb 2025 00:50:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958781#M374180</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2025-02-10T00:50:31Z</dc:date>
    </item>
    <item>
      <title>Re: Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958782#M374181</link>
      <description>Thanks a lot</description>
      <pubDate>Mon, 10 Feb 2025 00:46:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958782#M374181</guid>
      <dc:creator>SK_11</dc:creator>
      <dc:date>2025-02-10T00:46:16Z</dc:date>
    </item>
    <item>
      <title>Re: Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958785#M374183</link>
      <description>&lt;P&gt;You can avoid needing to make the macro with the number of patterns.&amp;nbsp; Just make the array large enough for the maximum number of patterns&amp;nbsp;&lt;SPAN&gt;you ever expect to have to handle.&amp;nbsp; &lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;This example uses is set to handle 9,999 patterns.&amp;nbsp; But even 99,999 or more should not cause any trouble. Just make sure to adjust the array size and the length of the MATCHTYPE variable.&amp;nbsp; (Or just keep the loop counter numeric variable instead.)&lt;/SPAN&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set have;
* Create variable match with same length as variable street ;
  if 0 then match=street;
* Set length of MATCHTYPE long enough for up to 9999 patterns ;
  length matchtype $11;
* Make array large enough for 9999 patterns ;
  array expr_id [9999] _temporary_;
  if _n_=1 then do pattern=1 to nobs;
* Parse regex patterns into array ;
    set patterns nobs=nobs;
    expr_id[pattern]=prxparse(strip(regex));
  end;
* Output any matches  ;
  do pattern=1 to nobs;
    call prxsubstr(expr_id[pattern], street, position, length);
    if position&amp;gt; 0 then do;
      match=substr(street, position, length);
      matchtype=cats('pattern', pattern);
      output;
    end;
  end;
  drop regex pattern;
run;&lt;/CODE&gt;&amp;nbsp;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 10 Feb 2025 04:18:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958785#M374183</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2025-02-10T04:18:41Z</dc:date>
    </item>
    <item>
      <title>Re: Extract text using Prxsubstr</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958786#M374184</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/159"&gt;@Tom&lt;/a&gt;&amp;nbsp;Sure, that will work as well but I can't see the hurt in an additional simple data _null_ step that won't iterate through the data.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 10 Feb 2025 04:22:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extract-text-using-Prxsubstr/m-p/958786#M374184</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2025-02-10T04:22:35Z</dc:date>
    </item>
  </channel>
</rss>

