<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Data Extraction in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57213#M15976</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;What output do you want ? You don't give us an example.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Mon, 20 Feb 2012 02:34:50 GMT</pubDate>
    <dc:creator>Ksharp</dc:creator>
    <dc:date>2012-02-20T02:34:50Z</dc:date>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57210#M15973</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I am wanting to extract information from a list of about 5000 observations. They are all in text and currently downloaded into excel sheets.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here are few examples of them:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;1. 20% PEG4000, 200mM Sodiumchloride, 50mM MES, 0.3% dioxane, pH 6.0, VAPOR DIFFUSION, HANGING DROP, temperature 310K&lt;/P&gt;&lt;P&gt;2. PEG3350, LiCl, Tris, pH 9.0, VAPOR DIFFUSION, HANGING DROP, temperature 295.0K&lt;/P&gt;&lt;P&gt;3. PEG3350, MgOAc, Na Cacodylate, TCEP, pH 6.9, VAPOR DIFFUSION, SITTING DROP, temperature 280K&lt;/P&gt;&lt;P&gt;4. CRYSTALLISED FROM HANGING DROP WHICH ALSO CONTAINED 50MM Potassiumphosphate, 5MM DTT AND 30% SATURATED AMMONIUMSULFATE, PH 7.0. THE WELL SOLUTION WAS 45% SATURATED AMMONIUMSULFATE., vapor diffusion - hanging drop&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I wanted to extract the concentrations of individual chemcals from the lists like 20%PEG4000, 200 mM Sodiumchloride, 50 mM MES as different variables(firt three examples) and 45% saturated ammoniumsulfate and 50MM Potassiumphosphate (I find this more challenging).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Could anyone provide some inights.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;&lt;P&gt;Hari &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 17 Feb 2012 14:48:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57210#M15973</guid>
      <dc:creator>hnam</dc:creator>
      <dc:date>2012-02-17T14:48:20Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57211#M15974</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hari,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is just one small piece of the puzzle.&amp;nbsp; In any data of this sort, expect to find variations in spelling for the same compound.&amp;nbsp; You'll find many spellings for Ammonium Sulfate, for example.&amp;nbsp; If you want to categorize them, you'll need to be able to translate from what's in the data to a standard term.&amp;nbsp; This often involves setting up a format to translate all known spellings, running it against the variables, and seeing what hasn't yet been translated by the format.&amp;nbsp; Define the format using upper case only, and try to pass the upper-cased version of your variables through the format.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Good luck.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 17 Feb 2012 14:57:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57211#M15974</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2012-02-17T14:57:54Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57212#M15975</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Given your 4 examples, what do you hope to have for each as a result?&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 17 Feb 2012 15:12:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57212#M15975</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2012-02-17T15:12:06Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57213#M15976</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;What output do you want ? You don't give us an example.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 20 Feb 2012 02:34:50 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57213#M15976</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-02-20T02:34:50Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57214#M15977</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt; Well what I want is:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I need the the percentage and checoncentrations of each of the chemical constitutents .&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;Hari&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 20 Feb 2012 17:42:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57214#M15977</guid>
      <dc:creator>hnam</dc:creator>
      <dc:date>2012-02-20T17:42:44Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57215#M15978</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Given your 4 examples, please show the exact values you would expect to extract to represent the chemical consituents.&amp;nbsp; The analysis part is easy.&amp;nbsp; However, to write code to extract values, one has to know what is supposed to be extracted.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 20 Feb 2012 17:58:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57215#M15978</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2012-02-20T17:58:28Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57216#M15979</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Are you looking to accomplish something like the following?:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data have (drop=i x temp);&lt;/P&gt;&lt;P&gt;&amp;nbsp; length temp contents $80;&lt;/P&gt;&lt;P&gt;&amp;nbsp; input;&lt;/P&gt;&lt;P&gt;&amp;nbsp; record+1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; i=1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; do while (scan(_infile_,i,",-") ne "");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=upcase(scan(_infile_,i,",-"));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; x=index(contents,". THE");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; if x gt 0 then do;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; temp=substr(contents,x+2);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=strip(substr(contents,1,x-1));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=temp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; x=index(contents," AND ");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; if x gt 0 then do;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; temp=substr(contents,x+5);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=strip(substr(contents,1,x-1));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=temp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; if length(contents) gt 30 then do;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; x=anydigit(contents);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; temp=contents;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=substr(contents,x);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; contents=strip(contents);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; if substr(contents,length(contents),1) eq "."&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; then contents=substr(contents,1,length(contents)-1);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; i+1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp; cards;&lt;/P&gt;&lt;P&gt;20% PEG4000, 200mM Sodiumchloride, 50mM MES, 0.3% dioxane, pH 6.0, VAPOR DIFFUSION, HANGING DROP, temperature 310K&lt;/P&gt;&lt;P&gt;PEG3350, LiCl, Tris, pH 9.0, VAPOR DIFFUSION, HANGING DROP, temperature 295.0K&lt;/P&gt;&lt;P&gt;PEG3350, MgOAc, Na Cacodylate, TCEP, pH 6.9, VAPOR DIFFUSION, SITTING DROP, temperature 280K&lt;/P&gt;&lt;P&gt;CRYSTALLISED FROM HANGING DROP WHICH ALSO CONTAINED 50MM Potassiumphosphate, 5MM DTT AND 30% SATURATED AMMONIUMSULFATE, PH 7.0. THE WELL SOLUTION WAS 45% SATURATED AMMONIUMSULFATE., vapor diffusion - hanging drop&lt;/P&gt;&lt;P&gt;;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc freq data=have;&lt;/P&gt;&lt;P&gt;&amp;nbsp; tables contents;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 20 Feb 2012 19:31:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57216#M15979</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2012-02-20T19:31:43Z</dc:date>
    </item>
    <item>
      <title>Data Extraction</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57217#M15980</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;It is what you are looking for ?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;data want(keep=found);
input;
ExpressionID = prxparse('/((\d|\.)+\s*(mm|mM|Mm|MM)\s*\w+)|((\d|\.)+\s*%\s*(\w|\s)+)/o');
start = 1;
stop = length(_infile_);
call prxnext(ExpressionID, start, stop, _infile_, position, length);
do while (position &amp;gt; 0);
found = substr(_infile_, position, length);
output;
call prxnext(ExpressionID, start, stop, _infile_, position, length);
end;
cards;
20% PEG4000, 200mM Sodiumchloride, 50mM MES, 0.3% dioxane, pH 6.0, VAPOR DIFFUSION, HANGING DROP, temperature 310K
PEG3350, LiCl, Tris, pH 9.0, VAPOR DIFFUSION, HANGING DROP, temperature 295.0K
PEG3350, MgOAc, Na Cacodylate, TCEP, pH 6.9, VAPOR DIFFUSION, SITTING DROP, temperature 280K
CRYSTALLISED FROM HANGING DROP WHICH ALSO CONTAINED 50MM Potassiumphosphate, 5MM DTT AND 30% SATURATED AMMONIUMSULFATE, PH 7.0. THE WELL SOLUTION WAS 45% SATURATED AMMONIUMSULFATE., vapor diffusion - hanging drop
; run;




&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 21 Feb 2012 10:41:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Data-Extraction/m-p/57217#M15980</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-02-21T10:41:07Z</dc:date>
    </item>
  </channel>
</rss>

