<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: extracting words from a text field based on their first 2 letters in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672367#M202059</link>
    <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp; How would you amend your RegEx to also capture SM888 from below data?&lt;/P&gt;
&lt;PRE&gt;SM780,SM888 occurred at 1:00pm and SM999&lt;/PRE&gt;</description>
    <pubDate>Sun, 26 Jul 2020 03:24:18 GMT</pubDate>
    <dc:creator>Patrick</dc:creator>
    <dc:date>2020-07-26T03:24:18Z</dc:date>
    <item>
      <title>extracting words from a text field based on their first 2 letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672249#M202000</link>
      <description>&lt;P&gt;How would I go about extracting all words from a text field that start with "SM" where the 2 letters "SM" are uppercase? All words that begin with "SM" would be copied into a new column where each word that begins with "SM" for that specific record would be separated by a space or comma.&lt;/P&gt;&lt;P&gt;For example, for record 100 the text is "SM690 and SM780 occurred at 1:00pm". The newly created column would show "SM690 SM780" or "SM690,SM780".&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Sat, 25 Jul 2020 01:17:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672249#M202000</guid>
      <dc:creator>gzr2mz39</dc:creator>
      <dc:date>2020-07-25T01:17:27Z</dc:date>
    </item>
    <item>
      <title>Re: extracting words from a text field based on their first 2 letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672253#M202002</link>
      <description>&lt;P&gt;Using a where clause is one way to accomplish this. &lt;CODE class=" language-sas"&gt;where x like 'SM%';&lt;/CODE&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data x;   
     do i=1 to 10; 
       x='small';
       output;
     end; 
     do i=1 to 10; 
       x='SMURF';
       output;
     end; 
     drop i; 
run;
data y; 
    set x;
    where x like 'SM%';
run; &lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 25 Jul 2020 00:50:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672253#M202002</guid>
      <dc:creator>Nicole_Fox</dc:creator>
      <dc:date>2020-07-25T00:50:46Z</dc:date>
    </item>
    <item>
      <title>Re: extracting words from a text field based on their first 2 letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672255#M202003</link>
      <description>&lt;P&gt;Below should do the job. The code is based on SAS sample code found &lt;A href="https://go.documentation.sas.com/?docsetId=lefunctionsref&amp;amp;docsetTarget=n1obc9u7z3225mn1npwnassehff0.htm&amp;amp;docsetVersion=9.4&amp;amp;locale=en" target="_self"&gt;here&lt;/A&gt;.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines truncover;
  input text $100.;
  datalines;
SM690 and SM780 occurred at 1:00pm
SM690 and Sm780 occurred at 1:00pm
abSM690 and Sm780 occurred at 1:00pm
;

data want(drop=_:);
  set have;
  length words $100;
  _prxid = prxparse('/\bSM.*?\b/');
  _start = 1;
  _stop = length(text);

  /* Use PRXNEXT to find the first instance of the pattern, */
  /* then use DO WHILE to find all further instances.       */
  /* PRXNEXT changes the _start parameter so that searching  */
  /* begins again after the last match.                     */
  call prxnext(_prxid, _start, _stop, trim(text), _pos, _len);

  do while (_pos &amp;gt; 0);
    words = catx(' ',words,substr(text, _pos, _len));
    call prxnext(_prxid, _start, _stop, trim(text), _pos, _len);
  end;
run;

proc print data=want;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1595643828428.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/47661iB9AD8BCB8F26B464/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Patrick_0-1595643828428.png" alt="Patrick_0-1595643828428.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 25 Jul 2020 02:23:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672255#M202003</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2020-07-25T02:23:57Z</dc:date>
    </item>
    <item>
      <title>Re: extracting words from a text field based on their first 2 letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672275#M202016</link>
      <description>&lt;P&gt;Like this?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE;
  infile cards truncover;
  input TEXT $100.;
cards;
SM690 and SM780 occurred at 1:00pm
SM690 and Sm780 occurred at 1:00pm
abSM690 and Sm780 occurred at 1:00pm
run;

data WANT;
  set HAVE;
  WORDS=prxchange('s/(\b(?!SM).*? )/ /',-1,TEXT);
run;

proc print data=WANT;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;DIV class="branch"&gt;
&lt;DIV&gt;
&lt;DIV align="left"&gt;
&lt;TABLE class="table" summary="Procedure Print: Data Set WORK.WANT" frame="box" rules="all" cellspacing="0" cellpadding="5"&gt;
&lt;THEAD&gt;
&lt;TR&gt;
&lt;TH class="r header" scope="col"&gt;Obs&lt;/TH&gt;
&lt;TH class="l header" scope="col"&gt;TEXT&lt;/TH&gt;
&lt;TH class="l header" scope="col"&gt;WORDS&lt;/TH&gt;
&lt;/TR&gt;
&lt;/THEAD&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;1&lt;/TH&gt;
&lt;TD class="l data"&gt;SM690 and SM780 occurred at 1:00pm&lt;/TD&gt;
&lt;TD class="l data"&gt;SM690 SM780&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;2&lt;/TH&gt;
&lt;TD class="l data"&gt;SM690 and Sm780 occurred at 1:00pm&lt;/TD&gt;
&lt;TD class="l data"&gt;SM690&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;3&lt;/TH&gt;
&lt;TD class="l data"&gt;abSM690 and Sm780 occurred at 1:00pm&lt;/TD&gt;
&lt;TD class="l data"&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 25 Jul 2020 07:32:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672275#M202016</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2020-07-25T07:32:07Z</dc:date>
    </item>
    <item>
      <title>Re: extracting words from a text field based on their first 2 letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672367#M202059</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16961"&gt;@ChrisNZ&lt;/a&gt;&amp;nbsp; How would you amend your RegEx to also capture SM888 from below data?&lt;/P&gt;
&lt;PRE&gt;SM780,SM888 occurred at 1:00pm and SM999&lt;/PRE&gt;</description>
      <pubDate>Sun, 26 Jul 2020 03:24:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672367#M202059</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2020-07-26T03:24:18Z</dc:date>
    </item>
    <item>
      <title>Re: extracting words from a text field based on their first 2 letters</title>
      <link>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672378#M202067</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/12447"&gt;@Patrick&lt;/a&gt;&amp;nbsp;Not too sure why the \b is not usable here at the end of the word.&lt;/P&gt;
&lt;P&gt;regex101.com suggests it should be a valid expression for the examples given and for your example.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I suppose we could use this:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data HAVE;
  infile cards truncover;
  input TEXT $100.;
  WORDS=prxchange('s/(\b(?!SM).*?\W)/ /',-1,TEXT);
cards;
SM690 and SM780 occurred at 1:00pm
SM690 and Sm780 occurred at 1:00pm
abSM690 and Sm780 occurred at 1:00pm
SM780,SM888 occurred at 1:00pm and SM999
run;

proc print; run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;DIV class="branch"&gt;&lt;A name="IDX" target="_blank"&gt;&lt;/A&gt;
&lt;DIV&gt;
&lt;DIV align="left"&gt;
&lt;TABLE class="table" summary="Procedure Print: Data Set WORK.HAVE" frame="box" rules="all" cellspacing="0" cellpadding="5"&gt;&lt;COLGROUP&gt; &lt;COL /&gt;&lt;/COLGROUP&gt; &lt;COLGROUP&gt; &lt;COL /&gt; &lt;COL /&gt;&lt;/COLGROUP&gt;
&lt;THEAD&gt;
&lt;TR&gt;
&lt;TH class="r header" scope="col"&gt;Obs&lt;/TH&gt;
&lt;TH class="l header" scope="col"&gt;TEXT&lt;/TH&gt;
&lt;TH class="l header" scope="col"&gt;WORDS&lt;/TH&gt;
&lt;/TR&gt;
&lt;/THEAD&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;1&lt;/TH&gt;
&lt;TD class="l data"&gt;SM690 and SM780 occurred at 1:00pm&lt;/TD&gt;
&lt;TD class="l data"&gt;SM690 SM780&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;2&lt;/TH&gt;
&lt;TD class="l data"&gt;SM690 and Sm780 occurred at 1:00pm&lt;/TD&gt;
&lt;TD class="l data"&gt;SM690&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;3&lt;/TH&gt;
&lt;TD class="l data"&gt;abSM690 and Sm780 occurred at 1:00pm&lt;/TD&gt;
&lt;TD class="l data"&gt;&amp;nbsp;&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TH class="r rowheader" scope="row"&gt;4&lt;/TH&gt;
&lt;TD class="l data"&gt;SM780,SM888 occurred at 1:00pm and SM999&lt;/TD&gt;
&lt;TD class="l data"&gt;SM780 SM888 SM999&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 26 Jul 2020 06:58:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/extracting-words-from-a-text-field-based-on-their-first-2/m-p/672378#M202067</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2020-07-26T06:58:17Z</dc:date>
    </item>
  </channel>
</rss>

