<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Read from a text file in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107580#M29923</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I found the most delicate part to be the extraction of sentences from paragraphs. Here is my take on it :&lt;/P&gt;&lt;P&gt;&amp;nbsp; &lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;filename text "&amp;amp;sasforum\datasets\BleakHouse.txt";&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;filename words "&amp;amp;sasforum\datasets\Vocab.txt";&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data paragraphs(keep=parId par);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;retain par;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;length par $4000;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;infile text end=flush truncover;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;input;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if missing(_infile_) and not missing(par) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; parId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call missing(par);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;else par = catx(" ", par, _infile_);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if flush and not missing(par) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; parId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data sentences(keep=parId senId sentence);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;length sentence $2000; &lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if prx1=0 then prx1 + prxparse("/""?[[:upper:]][^.!?]+[.!?]""?/");&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;set paragraphs;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;sBeg = 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;do until (pos = 0);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call prxnext(prx1, sBeg, -1, par, pos, len);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if pos &amp;gt; 0 then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sentence = catx(" ", sentence, substr(par, pos, len));&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if&amp;nbsp; scan(sentence, -1, " -") not in ("Mr.", "Dr.", "Mrs.") then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* At this point you could filter out very short sentences */&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; senId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call missing(sentence);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if not missing(sentence) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; senId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data vocab;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;length word $20;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;infile words truncover firstobs=2;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;input word;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data fancyWords(keep=parId senId word wordPos);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;array words{1000} $20 (" ");&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;retain nbWords;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if missing(nbWords) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do nbWords = 1 to dim(words) by 1 until (endVocab);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; set vocab end=endVocab;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; words{nbWords} = word;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;set sentences;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;lowSent = lowcase(sentence);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;do i = 1 to nbWords;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; word = words{i};&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; wordPos = index(lowSent, trim(word));&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if wordPos &amp;gt; 0 then output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;proc sql;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;select word, sentence&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;from fancyWords natural join sentences&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;order by word, senId;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;quit;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;PG&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Sun, 11 Aug 2013 03:42:15 GMT</pubDate>
    <dc:creator>PGStats</dc:creator>
    <dc:date>2013-08-11T03:42:15Z</dc:date>
    <item>
      <title>Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107577#M29920</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear All,&lt;/P&gt;&lt;P&gt;I am a teacher, I would like to help my students in the following manner with their vocabulary:&lt;/P&gt;&lt;P&gt;I have a list of words that I want to them to learn (say @ “D:\SASFiles\Vocab.txt”). I have a classic book, BleakHouse.txt in the same location. I would like to use SAS to extract sentences from the book for each word in the vocab.txt.&lt;/P&gt;&lt;P&gt;Here is an illustration of the final result.&lt;/P&gt;&lt;TABLE border="0" cellpadding="0" cellspacing="0" width="637"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="95"&gt;&lt;P align="center" style="margin-bottom: .0001pt; text-align: center;"&gt;&lt;STRONG style="color: black;"&gt;alacrity&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="542"&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="color: black;"&gt;At the inn we found Mr. Boythorn on horseback, waiting with an open carriage to take us to his house, which was a few miles off. He was overjoyed to see us and dismounted with great &lt;STRONG&gt;alacrity&lt;/STRONG&gt;.&lt;/SPAN&gt;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="95"&gt;&lt;P align="center" style="margin-bottom: .0001pt; text-align: center;"&gt;&lt;STRONG style="color: black;"&gt;approbation&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="542"&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="color: black;"&gt;In case I should be taking a liberty in putting your ladyship on your guard when there's no necessity for it, you will endeavour, I should hope, to outlive my presumption, and I hall endeavour to outlive your &lt;STRONG&gt;disapprobation&lt;/STRONG&gt;.&lt;/SPAN&gt;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="95"&gt;&lt;P align="center" style="margin-bottom: .0001pt; text-align: center;"&gt;&lt;STRONG style="color: black;"&gt;audacious&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="542"&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="color: black;"&gt;I wouldn't be guilty of the &lt;STRONG&gt;audacious&lt;/STRONG&gt; insolence of keeping a lady of the house waiting all this time for any earthly consideration. I would infinitely rather destroy myself--infinitely rather!&lt;/SPAN&gt;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="95"&gt;&lt;P align="center" style="margin-bottom: .0001pt; text-align: center;"&gt;&lt;STRONG style="color: black;"&gt;capricious&lt;/STRONG&gt;&lt;/P&gt;&lt;/TD&gt;&lt;TD nowrap="nowrap" style="padding: 0 5.4pt 0 5.4pt;" width="542"&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="color: black;"&gt;Too &lt;STRONG&gt;capricious&lt;/STRONG&gt; and imperious in all she does to be the cause of much surprise in those about her as to anything she does, this woman, loosely muffled, goes out into the moonlight.&lt;/SPAN&gt;&lt;/P&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It will be too much of a favor, at least if someone can help to proceed in the right step, it will be highly appreciated:&lt;/P&gt;&lt;OL style="list-style-type: decimal;"&gt;&lt;LI&gt;How to extract sentences from a text files (ebook in txt format).&lt;/LI&gt;&lt;LI&gt;How to compare the two data sets: vocab with words and the dataset with sentences.&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks in advance,&lt;/P&gt;&lt;P&gt;Jijil&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 10 Aug 2013 19:14:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107577#M29920</guid>
      <dc:creator>JAR</dc:creator>
      <dc:date>2013-08-10T19:14:16Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107578#M29921</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi:&lt;/P&gt;&lt;P&gt;&amp;nbsp; I once wrote a program to do a frequency count of the words in Moby Dick. I posted the program here:&lt;/P&gt;&lt;P&gt;&lt;A _jive_internal="true" class="active_link" href="https://communities.sas.com/message/13656#13656"&gt;https://communities.sas.com/message/13656#13656&lt;/A&gt; &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; My text was structured a bit differently than your document, so the program would need tweaking a bit.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; I see a different issue, though. Finding out whether you have a word match is easy. Extracting a sentence is harder. Because the sentence may span multiple lines. Your word of interest could be in the middle of a sentence that spans 4 lines. So that presents an issue...because it will be hard to go -forward and -back in the text file. But at least, if you captured the Chapter number and paragraph number and line number, you could tell approximately which chapter number and paragraph number someone had to look for in the text. I'm not sure that helps you in your quest. I think you could point to an approximate place, but would need to have some manual intervention to detect where the sentence actually started and stopped.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;cynthia&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 10 Aug 2013 21:09:14 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107578#M29921</guid>
      <dc:creator>Cynthia_sas</dc:creator>
      <dc:date>2013-08-10T21:09:14Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107579#M29922</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Reading that book format it paragraphs is not hard.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;STRONG style="color: navy; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;data&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; book;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;length&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; c p l &lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;8&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line pline $&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;80&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; &lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; chapter $&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;30&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; author $&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;50&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; title $&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;50&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; &lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;&amp;nbsp; ;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;retain&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; author title chapter &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; c -&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;1&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; p &lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;0&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; l &lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;0&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; pline &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;infile&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;'c:\downloads\BleakHouse.txt'&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;truncover&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; ;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;input&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: teal; background: white;"&gt;$char80.&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line=:&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;'***END OF THE PROJECT GUTENBERG EBOOK'&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;stop&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;else&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line=:&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;'Title:'&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; and title=&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; title = substr(line,&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;8&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;); &lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;else&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line=:&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;'Author:'&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; and author=&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; author = substr(line,&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;9&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;);&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;else&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line in: (&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;'PREFACE'&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;,&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;'CHAPTER'&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;) &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;do&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; c+&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;1&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; p=&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;0&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; l=&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;0&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; chapter=line; &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;end&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;else&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; line ^= &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;do&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; (pline=&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;) &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;do&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; p+&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;1&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; l=&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;0&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;; &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;end&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;&amp;nbsp; l+&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;1&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;if&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; chapter ne &lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;then&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;output&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;end&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;pline=line;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG style="color: navy; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;run&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You could put the paragraphs into one long character variable.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;STRONG style="color: navy; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;data&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; paragraphs;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;do&lt;/SPAN&gt; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;until&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;(last.p);&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;set&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; book;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;by&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; c p l;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;length&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; paragraph $&lt;/SPAN&gt;&lt;STRONG style="color: teal; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;30000&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;&amp;nbsp;&amp;nbsp; paragraph = catx(&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: purple; background: white;"&gt;' '&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;,paragraph,line);&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;end&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="margin-bottom: .0001pt;"&gt;&amp;nbsp; &lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: blue; background: white;"&gt;keep&lt;/SPAN&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt; c p paragraph chapter author title ;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG style="color: navy; background: white; font-size: 10.0pt; font-family: 'Courier New';"&gt;run&lt;/STRONG&gt;&lt;SPAN style="font-size: 10.0pt; font-family: 'Courier New'; color: black; background: white;"&gt;;&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 10 Aug 2013 23:24:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107579#M29922</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2013-08-10T23:24:16Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107580#M29923</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I found the most delicate part to be the extraction of sentences from paragraphs. Here is my take on it :&lt;/P&gt;&lt;P&gt;&amp;nbsp; &lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;filename text "&amp;amp;sasforum\datasets\BleakHouse.txt";&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;filename words "&amp;amp;sasforum\datasets\Vocab.txt";&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data paragraphs(keep=parId par);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;retain par;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;length par $4000;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;infile text end=flush truncover;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;input;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if missing(_infile_) and not missing(par) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; parId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call missing(par);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;else par = catx(" ", par, _infile_);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if flush and not missing(par) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; parId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data sentences(keep=parId senId sentence);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;length sentence $2000; &lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if prx1=0 then prx1 + prxparse("/""?[[:upper:]][^.!?]+[.!?]""?/");&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;set paragraphs;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;sBeg = 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;do until (pos = 0);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call prxnext(prx1, sBeg, -1, par, pos, len);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if pos &amp;gt; 0 then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sentence = catx(" ", sentence, substr(par, pos, len));&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if&amp;nbsp; scan(sentence, -1, " -") not in ("Mr.", "Dr.", "Mrs.") then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* At this point you could filter out very short sentences */&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; senId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call missing(sentence);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if not missing(sentence) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; senId + 1;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data vocab;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;length word $20;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;infile words truncover firstobs=2;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;input word;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;data fancyWords(keep=parId senId word wordPos);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;array words{1000} $20 (" ");&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;retain nbWords;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;if missing(nbWords) then do;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do nbWords = 1 to dim(words) by 1 until (endVocab);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; set vocab end=endVocab;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; words{nbWords} = word;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;set sentences;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;lowSent = lowcase(sentence);&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;do i = 1 to nbWords;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; word = words{i};&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; wordPos = index(lowSent, trim(word));&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if wordPos &amp;gt; 0 then output;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;run;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;proc sql;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;select word, sentence&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;from fancyWords natural join sentences&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;order by word, senId;&lt;/STRONG&gt;&lt;BR /&gt;&lt;STRONG&gt;quit;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt; &lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;PG&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 11 Aug 2013 03:42:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107580#M29923</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2013-08-11T03:42:15Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107581#M29924</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear Tom,&lt;/P&gt;&lt;P&gt;I need some more help. There is one other data set (work.vocab), which has only one variable named 'word'. How do I join this (work.paragraph) with work.vocab?&lt;/P&gt;&lt;P&gt;Let me try to illustrate: &lt;/P&gt;&lt;P&gt;The first word is Alacrity in work.vocab. The first occurrence of this word in work.paragraph is on Chapter XVIII, that is 1864th observation of work.paragraph. How do I join these two datasets with this condition.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you allow me to be greedy and ask for more... for each word paragraph would be an extravaganza, a mere sentence would be nicer... However, I shall be thankful if you could help me with paragraphs.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Sincerely,&lt;/P&gt;&lt;P&gt;Jijil&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 11 Aug 2013 04:04:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107581#M29924</guid>
      <dc:creator>JAR</dc:creator>
      <dc:date>2013-08-11T04:04:16Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107582#M29925</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Using your PARAGRAPHS dataset Tom, one more data step just about wraps it up.&amp;nbsp; &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The result is to add the sentence and vocab word to the dataset.&amp;nbsp; Its a bit slow so I was just reading in the first few vocab words.&amp;nbsp; Hope it helps JAR.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data matched_sentences(keep=c p paragraph chapter author title sentence vocab_word);&lt;/P&gt;&lt;P&gt;&amp;nbsp; set paragraphs;&lt;/P&gt;&lt;P&gt;&amp;nbsp; retain prxid i&amp;nbsp; 0;&lt;/P&gt;&lt;P&gt;&amp;nbsp; length sentence $10000;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; if _N_=1 then&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* load a temporary array with the vocab words */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; array vocab(400) $40 _temporary_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do i=1 to 400 until (no_more);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; infile 'c:\temp\vocab.txt' truncover firstobs=2 obs=10 end=no_more;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; input word $40.;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; vocab{i} = word;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* For each paragraph, loop through the vocab words looking for a match */&lt;/P&gt;&lt;P&gt;&amp;nbsp; do word_num = 1 to i;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; prxid = prxparse('/[^!\.\?]*?' || strip(vocab{word_num}) || '.*?[!\.\?]"*/');&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call prxsubstr(prxid,lowcase(paragraph), pos, len);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if pos &amp;gt; 0 then&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sentence = left(substr(paragraph,pos,len));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; vocab_word = vocab{word_num};&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp; end;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Its not perfect as "Mr." will end the sentence for example, but just needs to be a bit smarter in the prxparse.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 11 Aug 2013 05:14:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107582#M29925</guid>
      <dc:creator>JerryLeBreton</dc:creator>
      <dc:date>2013-08-11T05:14:32Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107583#M29926</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear PG,&lt;/P&gt;&lt;P&gt;Your code does it all..... Thank you so much. As I have already give marked Tom's answer correct, I can't change. I sincerely thank you too.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Sincerely,&lt;/P&gt;&lt;P&gt;Jijil&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 11 Aug 2013 05:32:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/107583#M29926</guid>
      <dc:creator>JAR</dc:creator>
      <dc:date>2013-08-11T05:32:22Z</dc:date>
    </item>
    <item>
      <title>Re: Read from a text file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/255550#M57070</link>
      <description>&lt;P&gt;Dear PG,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have used the above code on a simple report, is there any possible way that I can do the same thing on multiple reports at a time by merging them and using a seperate column called title based on word search to categorise which document the word was from.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank You&lt;/P&gt;&lt;P&gt;Bhavana&lt;/P&gt;</description>
      <pubDate>Wed, 09 Mar 2016 15:22:46 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Read-from-a-text-file/m-p/255550#M57070</guid>
      <dc:creator>saibhavana</dc:creator>
      <dc:date>2016-03-09T15:22:46Z</dc:date>
    </item>
  </channel>
</rss>

