<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Can figure out rudimentary text mining in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539219#M148534</link>
    <description>&lt;P&gt;Elegant - thanks!&lt;/P&gt;</description>
    <pubDate>Thu, 28 Feb 2019 05:55:08 GMT</pubDate>
    <dc:creator>edouglasa</dc:creator>
    <dc:date>2019-02-28T05:55:08Z</dc:date>
    <item>
      <title>Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539217#M148532</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I have never used any of the SAS data mining tools. I have a simple problem. I have 500 legal texts. I want to search to see if they have ten specific words and produce output showing which laws have which texts. Can SAS do this? Can anyone point me to the right platform or code?&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 05:36:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539217#M148532</guid>
      <dc:creator>edouglasa</dc:creator>
      <dc:date>2019-02-28T05:36:03Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539218#M148533</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*Covert the files to long format string*/

data have;
infile cards dlm=',';
input text:$2000.;
cards;
we the people of UK
democracy is by the people
I love egg
;
run;

data want;
set have;
word_flag1=findw(text, 'UK',' ');
word_flag2=findw(text, 'people',' ');
/*.................
word_flag3=findw(text, '****',' ');
word_flag4=findw(text, '****',' ');
..................*/
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 28 Feb 2019 05:47:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539218#M148533</guid>
      <dc:creator>Satish_Parida</dc:creator>
      <dc:date>2019-02-28T05:47:29Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539219#M148534</link>
      <description>&lt;P&gt;Elegant - thanks!&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 05:55:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539219#M148534</guid>
      <dc:creator>edouglasa</dc:creator>
      <dc:date>2019-02-28T05:55:08Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539220#M148535</link>
      <description>&lt;P&gt;But only one question. Could I read the files from a directly rather than pasting them in?&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 05:56:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539220#M148535</guid>
      <dc:creator>edouglasa</dc:creator>
      <dc:date>2019-02-28T05:56:14Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539224#M148537</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/264464"&gt;@edouglasa&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;But only one question. Could I read the files from a directly rather than pasting them in?&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;To be sure that i understood your question: Do you want to read the words you search from file?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 06:37:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539224#M148537</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2019-02-28T06:37:00Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539285#M148567</link>
      <description>&lt;P&gt;I'd like to search for terms across word documents or txt files in a directory. I'd like output that indicates everytime which of the search terms is in which word or txt file.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks so much!&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 13:40:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539285#M148567</guid>
      <dc:creator>edouglasa</dc:creator>
      <dc:date>2019-02-28T13:40:26Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539297#M148570</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/264464"&gt;@edouglasa&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;I'd like to search for terms across word documents or txt files in a directory. I'd like output that indicates everytime which of the search terms is in which word or txt file.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks so much!&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;I don't think that reading word documents is possible with sas, at least not without wasting an incredible amout of time.&lt;/P&gt;
&lt;P&gt;Can your post an example of the expected result-dataset?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 14:10:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539297#M148570</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2019-02-28T14:10:00Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539302#M148573</link>
      <description>&lt;P&gt;Got it. Maybe I need to find a more robust datamining software package.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I was looking for an output like:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Text_name, word1, word2, word3&lt;/P&gt;&lt;P&gt;text1, yes, yes, no&lt;/P&gt;&lt;P&gt;text2, yes, no, no&lt;/P&gt;&lt;P&gt;text3, yes, no, no&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;So the first three texts would all have word1, only text1 would have word 1 and none of the texts would have word3. I was hoping for a way to access the text files from a folder on my machine.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 14:18:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539302#M148573</guid>
      <dc:creator>edouglasa</dc:creator>
      <dc:date>2019-02-28T14:18:09Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539327#M148588</link>
      <description>&lt;P&gt;Accessing all text-files in folder is easy:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data work.Matches;
    length 
        filename $ 200 
        text_name $ 1000
        word1-word3 $ 3
    ;

    infile "PATH/*.txt" filename=_filename;
    input;

    text_name = _infile_;
    word1 = ifc(findw(text_name, 'WORD_A'), 'yes', 'no');
    word2 = ifc(findw(text_name, 'WORD_BEE'), 'yes', 'no');
    word3 = ifc(findw(text_name, 'WORD_C'), 'yes', 'no');

    if index(cats(of word:), 'yes') then do;
        filename = _filename;
        output;
    end;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;This code is untested.&amp;nbsp; It should create a dataset containing all lines in which at least one word was found.&lt;/P&gt;</description>
      <pubDate>Thu, 28 Feb 2019 14:55:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/539327#M148588</guid>
      <dc:creator>andreas_lds</dc:creator>
      <dc:date>2019-02-28T14:55:20Z</dc:date>
    </item>
    <item>
      <title>Re: Can figure out rudimentary text mining</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/541488#M149504</link>
      <description>&lt;P&gt;This is amazing. Thanks a ton!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 08 Mar 2019 17:29:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Can-figure-out-rudimentary-text-mining/m-p/541488#M149504</guid>
      <dc:creator>edouglasa</dc:creator>
      <dc:date>2019-03-08T17:29:12Z</dc:date>
    </item>
  </channel>
</rss>

