<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extracting keywords and corresponding number in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386363#M277199</link>
    <description>&lt;P&gt;Building on &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp&lt;/a&gt;'s excellent solution, I&amp;nbsp;would simplify, refine and complete as:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile cards truncover;
input x $200.;
cards;
text text text text textdimentionstext text text where length, height, width text text text of 30.2mm, 24mm and 55.3mm
text text text text textdimentionstext text text where height, width text text text of  24mm and 55.3mm
text text text text textdimentionstext text text where length, text text text 55.3mm
;

data k(keep=group i var) v(keep=group i value) ;
 set have;
 group+1;
 s=1;e=length(x);
 pid=prxparse('/\b(length|height|width)\b/oi');
 call prxnext(pid,s,e,x,p,l);
 do i = 1 by 1 while(p&amp;gt;0);
  var=substr(x,p,l);
  output k;
  call prxnext(pid,s,e,x,p,l);
 end;

 pid=prxparse('/\d+(\.\d*)?\s?mm/oi');
 call prxnext(pid,s,e,x,p,l);
 do i = 1 by 1 while(p&amp;gt;0);
  value=input(compress(substr(x,p,l),"mM"),best.);
  output v;
  call prxnext(pid,s,e,x,p,l);
 end;

 run;

data list;
 merge k v;
 by group i;
drop i;
run;

proc transpose data=list out=table(drop=_name_);
by group;
id var;
var value;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 08 Aug 2017 18:36:33 GMT</pubDate>
    <dc:creator>PGStats</dc:creator>
    <dc:date>2017-08-08T18:36:33Z</dc:date>
    <item>
      <title>Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386221#M277195</link>
      <description>&lt;P&gt;Hi everyone&lt;/P&gt;
&lt;P&gt;i need help solving the following problem, i think Perl extensions would be the best way to do it, but not sure how...&lt;/P&gt;
&lt;P&gt;I have a text containing information on different body parts.&lt;/P&gt;
&lt;P&gt;The text is written in the following&amp;nbsp;way&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;text&amp;nbsp;text&amp;nbsp;text&lt;/SPAN&gt;&amp;nbsp;&lt;SPAN&gt;text&amp;nbsp;text&lt;/SPAN&gt;dimentions&lt;SPAN&gt;text&amp;nbsp;text&amp;nbsp;text&amp;nbsp;&lt;/SPAN&gt;where&amp;nbsp;l&lt;STRONG&gt;ength, height, width&lt;/STRONG&gt;&amp;nbsp;&lt;SPAN&gt;text&amp;nbsp;text&amp;nbsp;text&lt;/SPAN&gt;&amp;nbsp;of&lt;STRONG&gt;&amp;nbsp;30.2mm, 24mm and 55.3mm&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Some of the strings will only have one or two of the three keywords (length, height, width):&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;text&amp;nbsp;text&amp;nbsp;text&amp;nbsp;text&amp;nbsp;textdimentionstext&amp;nbsp;text&amp;nbsp;text&amp;nbsp;where&lt;STRONG&gt;&amp;nbsp;height, width&lt;/STRONG&gt;&amp;nbsp;text&amp;nbsp;text&amp;nbsp;text&amp;nbsp;of&lt;STRONG&gt;&amp;nbsp; 24mm and 55.3mm&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;or&lt;/P&gt;
&lt;P&gt;text&amp;nbsp;text&amp;nbsp;text&amp;nbsp;text&amp;nbsp;textdimentionstext&amp;nbsp;text&amp;nbsp;text&amp;nbsp;where&amp;nbsp;&lt;STRONG&gt;length&lt;/STRONG&gt;,&amp;nbsp;text&amp;nbsp;text&amp;nbsp;text&lt;STRONG&gt;&amp;nbsp;55.3mm&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;So basically:&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;the code will need to seach for the occurance of any of the three words (or two or one of them, depending on what the sentence contains) and then assigns the numbers consquetivly as they appear in the sentence.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Any help is greatly&amp;nbsp;appreciated&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;The common features in all strings&amp;nbsp;are:&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;1. the keywords are always from a list of the three vaiables (length, height, width)&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;2. The measurement always ends in the two letters &lt;STRONG&gt;mm&lt;/STRONG&gt;, they might contain a decimal or not (.)&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 08 Aug 2017 12:03:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386221#M277195</guid>
      <dc:creator>ammarhm</dc:creator>
      <dc:date>2017-08-08T12:03:53Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386240#M277197</link>
      <description>&lt;P&gt;Well, first let me advise you that it is highly recommended NOT to use free text for any kind of analysis. &amp;nbsp;I can't stress that enough. &amp;nbsp;&lt;/P&gt;
&lt;P&gt;That being said, it is technically possible to extract the information, but those rules have to be really robust for anything to work:&lt;/P&gt;
&lt;PRE&gt;data want (drop=i tmp ind:);
  infile datalines dlm="~";
  length line tmp $2000;
  input line $;
  array loc{3} $200;
  array dim{3} $200;
  ind1=1;
  ind2=1;
  do i=1 to countw(line," ,");
    tmp=scan(line,i," ,");
    if tmp in ("length","height","width") then do;
      loc{ind1}=tmp;
      ind1=ind1+1;
    end;
    if index(tmp,"mm")&amp;gt;0 then do;
      dim{ind2}=tmp;
      ind2=ind2+1;
    end;
  end;
datalines;
aabg thkd length, height, width as abe ddd 24mm and 55.3mm,34mm
trryurturur aswwrqr length, height possioe asad 12mm wert 34mm
;
run;&lt;/PRE&gt;
&lt;P&gt;Again, really not advised.&lt;/P&gt;</description>
      <pubDate>Tue, 08 Aug 2017 12:55:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386240#M277197</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2017-08-08T12:55:19Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386258#M277198</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile cards truncover;
input x $200.;
cards;
text text text text textdimentionstext text text where length, height, width text text text of 30.2mm, 24mm and 55.3mm
text text text text textdimentionstext text text where height, width text text text of  24mm and 55.3mm
text text text text textdimentionstext text text where length, text text text 55.3mm
;
run;
data want;
 set have;
 group+1;
 s=1;e=length(x);
 pid=prxparse('/\b(length|height|width)\b/oi');
 call prxnext(pid,s,e,x,p,l);
 do while(p&amp;gt;0);
  want=substr(x,p,l);
  output;
  call prxnext(pid,s,e,x,p,l);
 end;

 pid=prxparse('/\d+(\.\d+)?mm/oi');
 call prxnext(pid,s,e,x,p,l);
 do while(p&amp;gt;0);
  want=substr(x,p,l);
  output;
  call prxnext(pid,s,e,x,p,l);
 end;

 drop pid p l s e;
 run;

 data k v(rename=(want=_want));
  set want;
  if anydigit(want) then output v;
   else output k;
run;
data final_want;
 merge k v;
 by group;
 drop x;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 08 Aug 2017 13:44:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386258#M277198</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-08-08T13:44:17Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386363#M277199</link>
      <description>&lt;P&gt;Building on &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp&lt;/a&gt;'s excellent solution, I&amp;nbsp;would simplify, refine and complete as:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile cards truncover;
input x $200.;
cards;
text text text text textdimentionstext text text where length, height, width text text text of 30.2mm, 24mm and 55.3mm
text text text text textdimentionstext text text where height, width text text text of  24mm and 55.3mm
text text text text textdimentionstext text text where length, text text text 55.3mm
;

data k(keep=group i var) v(keep=group i value) ;
 set have;
 group+1;
 s=1;e=length(x);
 pid=prxparse('/\b(length|height|width)\b/oi');
 call prxnext(pid,s,e,x,p,l);
 do i = 1 by 1 while(p&amp;gt;0);
  var=substr(x,p,l);
  output k;
  call prxnext(pid,s,e,x,p,l);
 end;

 pid=prxparse('/\d+(\.\d*)?\s?mm/oi');
 call prxnext(pid,s,e,x,p,l);
 do i = 1 by 1 while(p&amp;gt;0);
  value=input(compress(substr(x,p,l),"mM"),best.);
  output v;
  call prxnext(pid,s,e,x,p,l);
 end;

 run;

data list;
 merge k v;
 by group i;
drop i;
run;

proc transpose data=list out=table(drop=_name_);
by group;
id var;
var value;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 08 Aug 2017 18:36:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386363#M277199</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-08-08T18:36:33Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386546#M277200</link>
      <description>&lt;P&gt;Thank you everyone&lt;/P&gt;
&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp&lt;/a&gt;&amp;nbsp;while the code ran perfectly well i ran into a&amp;nbsp;problems:&lt;/P&gt;
&lt;P&gt;1. The code extracted that last number only, i correct this by assing s=1 after the first do-end loop&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;another question, if the text file lióoks like this:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;text text text text textdimentionstext text text where&lt;STRONG&gt; length 22.3mm, height 12mm, width 53mm&lt;/STRONG&gt; text text text&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;is there a way to specifically link the number to the keyword just befre it? The reason I am asking is that there might be another keyword and a number after it in the middle, and I am not interested in these other keywords :&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;text text text text textdimentionstext text text where&lt;/SPAN&gt;&lt;STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;length 22.3mm, height 12mm,&lt;SPAN&gt;&amp;nbsp;&lt;FONT color="#FF0000"&gt;thickness 2.1mm&lt;/FONT&gt;,&amp;nbsp;&lt;/SPAN&gt;width 53mm&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;text text text&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;In the above example i am not interested in thickness&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Kind regards&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2017 10:41:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386546#M277200</guid>
      <dc:creator>ammarhm</dc:creator>
      <dc:date>2017-08-09T10:41:42Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386594#M277201</link>
      <description>&lt;P&gt;If you know all the keyword , you can specify it in&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;pid=prxparse('/\b(length|height|width|thickness)\b/oi');&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Otherwise, you need write some code to find out all these keywords.&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2017 12:33:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386594#M277201</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-08-09T12:33:16Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386597#M277202</link>
      <description>&lt;P&gt;Thanks Ksharp&lt;/P&gt;
&lt;P&gt;To explain further: I know all the keywords....&lt;/P&gt;
&lt;P&gt;The way the code is made, is that the second part of the code simply extracts the series of numbers in textas they occure.&lt;/P&gt;
&lt;P&gt;So in the following example, the first part of the code identifies the three keywords which I am interested in, which is very adequate:&lt;/P&gt;
&lt;LI-CODE lang="sas"&gt;pid&lt;SPAN class="token operator"&gt;=&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;prxparse&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;(&lt;/SPAN&gt;&lt;SPAN class="token string"&gt;'/\b(length|height|width|thickness)\b/oi'&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;/LI-CODE&gt;
&lt;P&gt;on&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;text text text text textdimentionstext text text where&lt;/SPAN&gt;&lt;STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;length 22.3mm, height 12mm,&lt;SPAN&gt;&amp;nbsp;&lt;FONT color="#FF0000"&gt;thickness 2.1mm&lt;/FONT&gt;,&amp;nbsp;&lt;/SPAN&gt;width 53mm&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;text text text&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;will return&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;length, height, width&lt;/STRONG&gt; ( I am not interested in thickness and I dont want it to be extracted)&lt;/P&gt;
&lt;P&gt;BUT&lt;/P&gt;
&lt;P&gt;The second part of the code will pull out 4 series of numbers:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt; pid=prxparse('/\d+(\.\d+)?mm/oi');&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;will return&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;22.3mm, 12mm,&lt;SPAN&gt;&lt;FONT color="#FF0000"&gt;&amp;nbsp;2.1mm&lt;/FONT&gt;,&lt;/SPAN&gt;&amp;nbsp;53mm&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;Now this will cause a missmatch when the data is merged. That is why I am trying to link the number directly to the keyword before it, if that can be done then&amp;nbsp;&lt;FONT color="#FF0000"&gt;&lt;STRONG&gt;2.1mm&lt;/STRONG&gt;&amp;nbsp;&lt;/FONT&gt;will not be extracted&lt;/P&gt;
&lt;P&gt;Kind regards&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2017 12:43:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386597#M277202</guid>
      <dc:creator>ammarhm</dc:creator>
      <dc:date>2017-08-09T12:43:36Z</dc:date>
    </item>
    <item>
      <title>Re: Extracting keywords and corresponding number</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386601#M277203</link>
      <description>&lt;P&gt;Yeah. Specify all the keywords in&amp;nbsp;&lt;/P&gt;
&lt;PRE class="lia-code-sample  language-sas"&gt;&lt;CODE class=" language-sas"&gt;pid&lt;SPAN class="token operator"&gt;=&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;prxparse&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;(&lt;/SPAN&gt;&lt;SPAN class="token string"&gt;'/\b(length|height|width|thickness)\b/oi'&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;that would avoid such kind of error.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Or use PG's code. If any one of V or Value is missing, that means there is a problem in that record.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2017 12:55:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Extracting-keywords-and-corresponding-number/m-p/386601#M277203</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-08-09T12:55:26Z</dc:date>
    </item>
  </channel>
</rss>

