<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: can sas identify a word or component in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325564#M72407</link>
    <description>&lt;P&gt;Within SAS as programming tool you can analyze any text.&lt;/P&gt;
&lt;P&gt;I don't know is there a ready SAS system to do what you want and&lt;/P&gt;
&lt;P&gt;even if there is - it should be programmed speciffically for the language&lt;/P&gt;
&lt;P&gt;you are interested in.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Have you ever used Google Translate ? - if yes, then you know that analyzing text&lt;/P&gt;
&lt;P&gt;and translating it to another language (that is transformaing from one language grammar to another)&lt;/P&gt;
&lt;P&gt;is very conplicated and not very accurate.&lt;/P&gt;</description>
    <pubDate>Wed, 18 Jan 2017 05:54:05 GMT</pubDate>
    <dc:creator>Shmuel</dc:creator>
    <dc:date>2017-01-18T05:54:05Z</dc:date>
    <item>
      <title>can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325561#M72406</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp;I am wondring that if sas can identify &amp;nbsp;a word which exists in the dictionary,not just created.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp;or if it can analyse the component of sentences as i want to extract the noun and delete other component such as attributes.&lt;/P&gt;&lt;P&gt;&amp;nbsp; the sentences include no clauses.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 05:22:38 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325561#M72406</guid>
      <dc:creator>JNWong</dc:creator>
      <dc:date>2017-01-18T05:22:38Z</dc:date>
    </item>
    <item>
      <title>Re: can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325564#M72407</link>
      <description>&lt;P&gt;Within SAS as programming tool you can analyze any text.&lt;/P&gt;
&lt;P&gt;I don't know is there a ready SAS system to do what you want and&lt;/P&gt;
&lt;P&gt;even if there is - it should be programmed speciffically for the language&lt;/P&gt;
&lt;P&gt;you are interested in.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Have you ever used Google Translate ? - if yes, then you know that analyzing text&lt;/P&gt;
&lt;P&gt;and translating it to another language (that is transformaing from one language grammar to another)&lt;/P&gt;
&lt;P&gt;is very conplicated and not very accurate.&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 05:54:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325564#M72407</guid>
      <dc:creator>Shmuel</dc:creator>
      <dc:date>2017-01-18T05:54:05Z</dc:date>
    </item>
    <item>
      <title>Re: can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325567#M72408</link>
      <description>&lt;P&gt;Thank you I &amp;nbsp;agree with what you said.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 06:32:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325567#M72408</guid>
      <dc:creator>JNWong</dc:creator>
      <dc:date>2017-01-18T06:32:44Z</dc:date>
    </item>
    <item>
      <title>Re: can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325585#M72419</link>
      <description>&lt;P&gt;Are you working with Base SAS or EM with Text Analytics?&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 09:01:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325585#M72419</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2017-01-18T09:01:46Z</dc:date>
    </item>
    <item>
      <title>Re: can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325586#M72420</link>
      <description>&lt;P&gt;sas base.&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 09:03:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325586#M72420</guid>
      <dc:creator>JNWong</dc:creator>
      <dc:date>2017-01-18T09:03:42Z</dc:date>
    </item>
    <item>
      <title>Re: can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325695#M72475</link>
      <description>&lt;P&gt;You will have to supply the logic for determining if a word is a noun or not if may be a noun, verb or even proper name.&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 14:56:11 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325695#M72475</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2017-01-18T14:56:11Z</dc:date>
    </item>
    <item>
      <title>Re: can sas identify a word or component</title>
      <link>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325810#M72500</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;SAS Forum: Is it a valid word and is it a noun, adjective, pronoun..

inspired
https://goo.gl/u5muLG
https://communities.sas.com/t5/Base-SAS-Programming/can-sas-identify-a-word-or-component/m-p/325561


Two parts

1. T1001520 Is it a valid word
2. T0099390 Natural Language Processing is it a noun, adjective, pronoun..


HAVE A LIST OF WORDS IN A TEXT FILE
===================================

data _null_;
  file "d:/txt/havewords.txt";
  put 'TOMMORROW';
  put 'TOMOROW';
run;quit;


WANT
====

File: "MYWORDS"

  Unrecognized word               Freq     Line(s)

  TOMMORROW                        1       2
        Suggestions: TOMORROW

  TOMOROW                          1       3
        Suggestions: TOMORROW


SOLUTION
========

filename mywords "d:/txt/havewords.txt";
data _null_;
  file "d:/txt/havewords.txt";
  put 'TOMMORROW';
  put 'TOMOROW';
run;quit;

PROC Spell in= mywords
               verify
               suggest;
run;quit;

NOW IF YOU WANT ANOTHER DICTIONARY
===================================

go to and download
http://wordlist.sourceforge.net/

Here is  dictionary of words begining with'TOMO's

"d:/txt/tomos.txt"

WRD

TOMOGRAM
TOMOGRAMS
TOMOGRAPH
TOMOGRAPHIC
TOMOGRAPHIES
TOMOGRAPHS
TOMOGRAPHY
TOMOLO
TOMOMANIA
TOMORN
TOMORROW
TOMORROWER
TOMORROWING
TOMORROWNESS
TOMORROWS
TOMOSIS

CREATE THE DICTIONARY of 'TOMO's

PROC Spell words  = "d:/txt/tomos.txt"
           create
           dict = work.mycatalog.spell;
run;quit;

* use the dictionary with misspellings;
PROC Spell in= mywords
               verify
               suggest
               dict = work.mycatalog.spell
;
run;quit;

/* T0099390 Natural Language Processing is it a noun, adjective, pronoun..

HAVE
====

options validvarname=upcase;

data "d:/sd1/txt.sas7bdat";
  length txt $255;
  txt=catx(
     ' '
    ,'Pierre Vinken, 61 years old, will join the board as a'
    ,'nonexecutive director Nov. 29.\n'
    ,'Mr. Vinken is chairman of Elsevier N.V.,'
    ,'the Dutch publishing group.');
  putlog txt;
run;quit;

WANT  Words are tagged with frequencies
========================================

Frequencies of nouns, pronouns, verbs ...

  ,   .  CD  DT  IN  JJ  MD  NN NNP NNS  VB VBZ
  3   2   2   3   2   3   1   5   7   1   1   1

 [1] "Pierre/NNP"      "Vinken/NNP"      ",/,"             "61/CD"
 [5] "years/NNS"       "old/JJ"          ",/,"             "will/MD"
 [9] "join/VB"         "the/DT"          "board/NN"        "as/IN"
[13] "a/DT"            "nonexecutive/JJ" "director/NN"     "Nov./NNP"
[17] "29/CD"           "./."             "Mr./NNP"         "Vinken/NNP"
[21] "is/VBZ"          "chairman/NN"     "of/IN"           "Elsevier/NNP"
[25] "N.V./NNP"        ",/,"             "the/DT"          "Dutch/JJ"
[29] "publishing/NN"   "group/NN"        "./."


CC     Coordinating conjunction
CD     Cardinal number
DT     Determiner
EX     Existential there
FW     Foreign word
IN     Preposition or subordinating conjunction
JJ     Adjective
JJR    Adjective, comparative
JJS    Adjective, superlative
LS     List item marker
MD     Modal
NN     Noun, singular or mass
NNS    Noun, plural
NNP    Proper noun, singular
NNPS   Proper noun, plural
PDT    Predeterminer
POS    Possessive ending
PRP    Personal pronoun
PRP$   Possessive pronoun
RB     Adverb
RBR    Adverb, comparative
RBS    Adverb, superlative
RP     Particle
SYM    Symbol
UH     Interjection
VB     Verb, base form
VBD    Verb, past tense
VBG    Verb, gerund or present participle
VBN    Verb, past participle
VBP    Verb, non­3rd person singular present
VBZ    Verb, 3rd person singular present
WDT    Wh­determiner
WP     Wh­pronoun
WP$    Possessive wh­pronoun
WRB    Wh­adverb

SOLUTION

%utl_submit_r64(
library(stringr);
library(NLP);
library(openNLP);
library(openNLPmodels.en);
library(haven);
txt&amp;lt;-read_sas('d:/sd1/txt.sas7bdat');
txt;
s &amp;lt;- as.String(txt$TXT);
sent_token_annotator &amp;lt;- Maxent_Sent_Token_Annotator();
word_token_annotator &amp;lt;- Maxent_Word_Token_Annotator();
a2 &amp;lt;- annotate(s, list(sent_token_annotator, word_token_annotator));
pos_tag_annotator &amp;lt;- Maxent_POS_Tag_Annotator();
pos_tag_annotator;
a3 &amp;lt;- annotate(s, pos_tag_annotator, a2);
a3;
head(annotate(s, Maxent_POS_Tag_Annotator(probs = TRUE), a2));
a3w &amp;lt;- subset(a3, type == 'word');
tags &amp;lt;- sapply(a3w$features, `[[`, 'POS');
tags;
table(tags);
sprintf('%s/%s', s[a3w], tags);
);

&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 18 Jan 2017 21:10:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/can-sas-identify-a-word-or-component/m-p/325810#M72500</guid>
      <dc:creator>rogerjdeangelis</dc:creator>
      <dc:date>2017-01-18T21:10:26Z</dc:date>
    </item>
  </channel>
</rss>

