<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: String Parsing - how to extract words between comma and dot in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956861#M373573</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Seems like this can work for TYPE B,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Data next ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;set have ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;xx = scan(data,-1,"/") ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;Array x_ [40] $55 x1-x40 ;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; do i = 1 to 40 ;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; &amp;nbsp;x_[i] = scan(scan(xx,-i,','),1,'.') ; ;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; end ;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; drop i xx data ;&lt;/DIV&gt;
&lt;DIV&gt;Run ;&amp;nbsp;&lt;/DIV&gt;</description>
    <pubDate>Wed, 22 Jan 2025 09:04:22 GMT</pubDate>
    <dc:creator>J111</dc:creator>
    <dc:date>2025-01-22T09:04:22Z</dc:date>
    <item>
      <title>String Parsing - how to extract words between comma and dot</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956851#M373569</link>
      <description>&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Hello,&lt;/DIV&gt;
&lt;DIV&gt;Please find below data &lt;STRONG&gt;Have&lt;/STRONG&gt; and data &lt;STRONG&gt;Want&lt;/STRONG&gt;.&lt;/DIV&gt;
&lt;DIV&gt;
&lt;P&gt;&lt;BR /&gt;Data &lt;STRONG&gt;Have&lt;/STRONG&gt; ;&lt;BR /&gt;input data $100. ;&lt;BR /&gt;cards ;&lt;BR /&gt;data/dataflow/BPS.STQR/EZR/1.0/&lt;STRONG&gt;RER_QWE_MED&lt;/STRONG&gt;.D.AUD.MED,&amp;nbsp;&lt;STRONG&gt;RER_QWA_MED&lt;/STRONG&gt;.D.CAD.WOW.WOW.OF00,&amp;nbsp; &lt;STRONG&gt;QWZ_PLO_WOW&lt;/STRONG&gt;.D.POP&lt;BR /&gt;;&lt;BR /&gt;Run ;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Data &lt;STRONG&gt;Want&lt;/STRONG&gt; ;&lt;BR /&gt;input x1 $60. ;&lt;BR /&gt;cards ;&lt;BR /&gt;RER_QWE_MED , RER_QWA_MED , QWZ_PLO_WOW&lt;BR /&gt;;&lt;BR /&gt;Run ;&lt;/P&gt;
&lt;/DIV&gt;
&lt;DIV&gt;We would like to extract X1 from the data according to the following rules for Type A and Type B:&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&lt;U&gt;Type A&lt;/U&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;1. it appears after the last back slash in the sentence&lt;/DIV&gt;
&lt;DIV&gt;2. and also before the first dot following this word (if there is a dot)&lt;/DIV&gt;
&lt;DIV&gt;3. and this word contains only capital letters or underscore in it (without small letters)&lt;/DIV&gt;
&lt;DIV&gt;4. if such a word is not found write 'NOWORD'&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&lt;U&gt;Type B&lt;/U&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;here we have to extract more than one word and the logic is as follows:&lt;/DIV&gt;
&lt;DIV&gt;1. it appears after the last back slash in the sentence&lt;/DIV&gt;
&lt;DIV&gt;2. continue in the same row until we find a comma and then take the word which is between the comma and the next first dot.&lt;/DIV&gt;
&lt;DIV&gt;recursively look for all the words between comma and next dot (as long as we are after the last backslash in the row).&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;the purpose is to clean relevant words from garbage, for example '.CAD.' is garbage.&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;for type A we have the solution&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; x1 = scan(scan(data,-1,"/"),1,'.') ; *after last back slash and before first period ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; if count(x1,lowcase(x1)) = 1 then x1 = 'NOWORD' ;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;We need a solution for type B&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;best solution would handle both type A and type B in the same row.&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;------------------------------------------------------&lt;/DIV&gt;
&lt;DIV&gt;BTW Next step we split the want data by delims.&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Data Audit_Split ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;set Audit_short ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;delims = '+,' ;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;Array s_ [40] $55 s1-s40 ;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; do i = 1 to 40 ;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; &amp;nbsp;s_[i] = left(scan(string, i, delims)) ;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; end ;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;drop delims i ;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Run ;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;----------------------------------------------------------------------------------------------------------&lt;/DIV&gt;
&lt;DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;/DIV&gt;
&lt;DIV&gt;Thanks in advance.&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;</description>
      <pubDate>Wed, 22 Jan 2025 09:13:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956851#M373569</guid>
      <dc:creator>J111</dc:creator>
      <dc:date>2025-01-22T09:13:43Z</dc:date>
    </item>
    <item>
      <title>Re: String Parsing - how to extract words between comma and dot</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956861#M373573</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Seems like this can work for TYPE B,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;Data next ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;set have ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;xx = scan(data,-1,"/") ;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; &amp;nbsp;Array x_ [40] $55 x1-x40 ;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; do i = 1 to 40 ;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; &amp;nbsp; &amp;nbsp;x_[i] = scan(scan(xx,-i,','),1,'.') ; ;&amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt; end ;&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&amp;nbsp; drop i xx data ;&lt;/DIV&gt;
&lt;DIV&gt;Run ;&amp;nbsp;&lt;/DIV&gt;</description>
      <pubDate>Wed, 22 Jan 2025 09:04:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956861#M373573</guid>
      <dc:creator>J111</dc:creator>
      <dc:date>2025-01-22T09:04:22Z</dc:date>
    </item>
    <item>
      <title>Re: String Parsing - how to extract words between comma and dot</title>
      <link>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956941#M373603</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/386728"&gt;@J111&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;You've got already a solution so below just for your consideration.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You wrote: "&lt;SPAN&gt;best solution would handle both type A and type B in the same row".&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;I made in below code the assumption that also type B should only select terms that only contain upper case letters and the underscore. If that's the case then you can "merge" your two type into a single rule.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Below should return your desired result based on my understanding of your rules.&lt;/SPAN&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data Have ;
	infile datalines4 truncover;
	input source_string $600. ;
datalines4 ;
/availability/dataflow/*/*/*/*/-
data/dataflow/BPS.STQR/WAB/1.0/NER_PGTABC
/data/dataflow/BPS.STQR/EZR/1.0/RER_QWE_MED.D.AUD.MED.MED.OF00,RER_QWA_MED.D.CAD.WOW.WOW.OF00,QWZ_PLO_WOW.D.POP.WOW.MED.OF00
NOSLASH.AT.ALL
/SLASH_bUT_mIxed_Case.B.C
;;;;
Run;

data want(drop=_:);
	set have;
	/* substring after last forward slash if any, else just whole string */
	_s=scan(source_string,-1,'/');
	
	/* within _s iterate for each substring that ends with a comma or is the last substring */
	do _i=1 to countc(_s,',')+1;
		length _word $50 words $200;
		/* extract the first sub-string that ends with a full stop else the whole string */
		_word=scan(scan(_s,_i,','),1,'.');
		/* check that _word only contains upper case characters or the underscore */
		if prxmatch('/^[A-Z_]+$/',strip(_word))=1 then
			do;
				words=catx(',',words,_word);
			end;
	end;
	if missing(words) then words='NOWORD';
run;

proc print data=want;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Patrick_0-1737589930139.png" style="width: 1137px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/103911iD343525E1062AE08/image-dimensions/1137x126?v=v2" width="1137" height="126" role="button" title="Patrick_0-1737589930139.png" alt="Patrick_0-1737589930139.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 22 Jan 2025 23:52:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/String-Parsing-how-to-extract-words-between-comma-and-dot/m-p/956941#M373603</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2025-01-22T23:52:37Z</dc:date>
    </item>
  </channel>
</rss>

