<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to read from pdf to SAS in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859163#M339472</link>
    <description>&lt;P&gt;Exactly.&amp;nbsp; How can you bring in things which aren't text.&amp;nbsp; Checkbox could be a PDF object, or a picture.&amp;nbsp; End of the day, you could learn javascript in PDF and export it, or possibly find a third party library to get it (perhaps at a quick search: &lt;A href="https://stackoverflow.com/questions/55777812/python-pdf-how-to-read-from-form-elements-like-checkbox" target="_blank"&gt;https://stackoverflow.com/questions/55777812/python-pdf-how-to-read-from-form-elements-like-checkbox&lt;/A&gt;), but its going to take a fair bit of effort.&amp;nbsp; Go back to source data is really the best option.&lt;/P&gt;</description>
    <pubDate>Thu, 16 Feb 2023 11:17:54 GMT</pubDate>
    <dc:creator>RW9</dc:creator>
    <dc:date>2023-02-16T11:17:54Z</dc:date>
    <item>
      <title>How to read from pdf to SAS</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859116#M339454</link>
      <description>&lt;P&gt;Dear Friends&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Is it possible to read contents from signed scanned pdf copy to SAS proc sql. Please help&lt;/P&gt;</description>
      <pubDate>Thu, 16 Feb 2023 05:40:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859116#M339454</guid>
      <dc:creator>SASUserRocks</dc:creator>
      <dc:date>2023-02-16T05:40:53Z</dc:date>
    </item>
    <item>
      <title>Re: How to read from pdf to SAS</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859125#M339458</link>
      <description>&lt;P&gt;You need first to convert your PDF image to text. You then can parse this text using a SAS data step to extract what you need and store it in a SAS table. You need a table to use SQL.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Tika is opensource software that can do such conversion to text. I believe SAS uses Tika (or at least used to) for this as part of Text Analytics.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Feb 2023 07:57:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859125#M339458</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2023-02-16T07:57:06Z</dc:date>
    </item>
    <item>
      <title>Re: How to read from pdf to SAS</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859138#M339464</link>
      <description>&lt;P&gt;PDF is a proprietary binary file.&amp;nbsp; SAS cannot read much else other than the plain binary code which unless you understand and can parse it, is of no use.&amp;nbsp; PDF is not a data format, its a render destination.&amp;nbsp; Whilst there are certain packages in python/R and other languages languages to do certain things, even if you do manage to get anything useful out of it would require full QC and probably a lot of work/processing.&amp;nbsp; I would highly recommend either returning to source data which is the preferred method, or worst case, type things in manually.&amp;nbsp; Ultimately PDF is a dreadful format, as far from open as possible, so avoid it as much as possible.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Feb 2023 08:48:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859138#M339464</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2023-02-16T08:48:52Z</dc:date>
    </item>
    <item>
      <title>Re: How to read from pdf to SAS</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859147#M339467</link>
      <description>&lt;P&gt;Thanks for the feedback provided. Lets say i can convert pdf to text. Here how can bring check box tick in txt file.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Feb 2023 10:14:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859147#M339467</guid>
      <dc:creator>SASUserRocks</dc:creator>
      <dc:date>2023-02-16T10:14:09Z</dc:date>
    </item>
    <item>
      <title>Re: How to read from pdf to SAS</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859163#M339472</link>
      <description>&lt;P&gt;Exactly.&amp;nbsp; How can you bring in things which aren't text.&amp;nbsp; Checkbox could be a PDF object, or a picture.&amp;nbsp; End of the day, you could learn javascript in PDF and export it, or possibly find a third party library to get it (perhaps at a quick search: &lt;A href="https://stackoverflow.com/questions/55777812/python-pdf-how-to-read-from-form-elements-like-checkbox" target="_blank"&gt;https://stackoverflow.com/questions/55777812/python-pdf-how-to-read-from-form-elements-like-checkbox&lt;/A&gt;), but its going to take a fair bit of effort.&amp;nbsp; Go back to source data is really the best option.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Feb 2023 11:17:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-read-from-pdf-to-SAS/m-p/859163#M339472</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2023-02-16T11:17:54Z</dc:date>
    </item>
  </channel>
</rss>

