<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Reading pdf files on SAS Viya 3.5 in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Reading-pdf-files-on-SAS-Viya-3-5/m-p/942218#M369491</link>
    <description>&lt;P&gt;When I upload my pdfs to a caslib via sftp then it works with the following code.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Except that it throws out the following problem note, but I think it should work once it gets solved by the admin.&amp;nbsp;&lt;/P&gt;
&lt;H2&gt;Problem Note&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;I&gt;69063:&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/I&gt;"ERROR: Failed to initialize a Java virtual machine in TKJNL" occurs with SAS® Cloud Analytic Services (CAS) actions that use Java&lt;/H2&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc cas ;
session mySession;
   table.dropCaslib / caslib='_TMPCAS_' quiet=true; 
   table.dropCaslib / caslib='_LOADTMP' quiet=true; 
run;

/*** Macro variable setup ***/
/* Specify file path to your images (such as the giraffe_dolphin_small example data) */
%let imagePath = /caslibs/akaike/my_pdf/;

/* Specify the caslib and table name for your image data table */
%let imageCaslibName = casuser;
%let imageTableName = images;

/* Specify the caslib and table name for the augmented training image data table */
%let imageTrainingCaslibName = &amp;amp;imageCaslibName;
%let imageTrainingTableName = &amp;amp;imageTableName.Augmented;


proc cas;
   file log;
   table.dropCaslib /
   caslib='loadPDFTempCaslib' quiet = true;
 run;


/*** Load and display images ***/ 
/* Create temporary caslib and libref for loading images */ 
caslib loadPDFTempCaslib datasource=(srctype="path") path="&amp;amp;imagePath"
    subdirs notactive sessref=mySession;
 
libname _loadtmp cas caslib="loadPDFTempCaslib"; 
libname _tmpcas_ cas caslib="CASUSER"; 

proc casutil;
	list files incaslib='loadPDFTempCaslib'; 
quit;


proc casutil;
    load casdata=''                                                              /* To read in all files use an empty string. For a single file specify the file name */
         incaslib='loadPDFTempCaslib'                                            /* The location of the files to load */
         importoptions=(fileType="document" fileExtList = 'PDF' tikaConv=True)   /* Specify document import options   */
         casout='pdf_data' outcaslib='casuser' replace;                          /* Specify the output cas table info */
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Mon, 02 Sep 2024 16:11:49 GMT</pubDate>
    <dc:creator>acordes</dc:creator>
    <dc:date>2024-09-02T16:11:49Z</dc:date>
    <item>
      <title>Reading pdf files on SAS Viya 3.5</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Reading-pdf-files-on-SAS-Viya-3-5/m-p/942209#M369486</link>
      <description>&lt;P&gt;I follow on this blog post&amp;nbsp;&lt;A title="Extract text from a PDF file using SAS Viya" href="https://blogs.sas.com/content/sgf/2023/11/08/extract-text-from-a-pdf-file-using-sas-viya/" target="_self"&gt;https://blogs.sas.com/content/sgf/2023/11/08/extract-text-from-a-pdf-file-using-sas-viya/&lt;/A&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;but for me it's not working.&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/48835"&gt;@pstyliadis&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="pic.png" style="width: 999px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/99882i41B453032A26AB93/image-size/large?v=v2&amp;amp;px=999" role="button" title="pic.png" alt="pic.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Running this program&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%let path = /Projects/Extract text from PDFs and create tables/mypdfs;
 
%put &amp;amp;=path;


proc cas;
   file log;
   table.dropCaslib /
   caslib='ac_pdf' quiet = true;
 run;

proc cas;
   session mySession;

   table.addCaslib /
     caslib="ac_pdf"
     description="pdf files"
     dataSource={srctype="path"}
     path="&amp;amp;path" subdirs=true ;
run;


proc casutil;
	list files incaslib='ac_pdf'; 
quit;

proc casutil;
    load casdata=''                                                              /* To read in all files use an empty string. For a single file specify the file name */
         incaslib='ac_pdf'                                                       /* The location of the files to load */
         importoptions=(fileType="document" fileExtList = 'PDF' tikaConv=True)   /* Specify document import options   */
         casout='pdf_data' outcaslib='casuser' replace;                          /* Specify the output cas table info */
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;I get the following errors in the log:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;DIV class="sasSource"&gt;1 %studio_hide_wrapper;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;83 %let path = /Projects/Extract text from PDFs and create tables/mypdfs;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;84&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;85 %put &amp;amp;=path;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;PATH=/Projects/Extract text from PDFs and create tables/mypdfs&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;86&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;87&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;88 proc cas;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;89 file log;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;90 table.dropCaslib /&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;91 caslib='ac_pdf' quiet = true;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;92 run;&lt;/DIV&gt;
&lt;DIV id="sasLogNote1_1725283858582" class="sasNote"&gt;NOTE: Active Session now MYSESSION.&lt;/DIV&gt;
&lt;DIV id="sasLogNote2_1725283858582" class="sasNote"&gt;NOTE: 'CASUSER(DKXEVO0)' is now the active caslib.&lt;/DIV&gt;
&lt;DIV id="sasLogNote3_1725283858582" class="sasNote"&gt;NOTE: Cloud Analytic Services removed the caslib 'ac_pdf'.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;93&lt;/DIV&gt;
&lt;DIV id="sasLogNote4_1725283858582" class="sasNote"&gt;NOTE: PROCEDURE CAS used (Total process time):&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;real time 0.01 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;cpu time 0.02 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;94 proc cas;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;95 session mySession;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;96&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;97 table.addCaslib /&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;98 caslib="ac_pdf"&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;99 description="pdf files"&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;100 dataSource={srctype="path"}&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;101 path="&amp;amp;path" subdirs=true ;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;102 run;&lt;/DIV&gt;
&lt;DIV id="sasLogNote5_1725283858582" class="sasNote"&gt;NOTE: Active Session now mySession.&lt;/DIV&gt;
&lt;DIV id="sasLogNote6_1725283858582" class="sasNote"&gt;NOTE: Failed to resolve path /Projects/Extract text from PDFs and create tables/mypdfs/ for caslib ac_pdf.&lt;/DIV&gt;
&lt;DIV id="sasLogNote7_1725283858582" class="sasNote"&gt;NOTE: 'ac_pdf' is now the active caslib.&lt;/DIV&gt;
&lt;DIV id="sasLogNote8_1725283858582" class="sasNote"&gt;NOTE: Cloud Analytic Services added the caslib 'ac_pdf'.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;103&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;104&lt;/DIV&gt;
&lt;DIV id="sasLogNote9_1725283858582" class="sasNote"&gt;NOTE: The PROCEDURE CAS printed page 5.&lt;/DIV&gt;
&lt;DIV id="sasLogNote10_1725283858582" class="sasNote"&gt;NOTE: PROCEDURE CAS used (Total process time):&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;real time 0.02 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;cpu time 0.05 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;105 proc casutil;&lt;/DIV&gt;
&lt;DIV id="sasLogNote11_1725283858582" class="sasNote"&gt;NOTE: The UUID '668012bb-0288-034f-9927-9c76fa3a3263' is connected using session MYSESSION.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;106&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;106! list files incaslib='ac_pdf';&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Caslib Information&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Library ac_pdf&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Source Type PATH&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Description pdf files&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Path /Projects/Extract text from PDFs and create tables/mypdfs/&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Session local Yes&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Active Yes&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Personal No&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Hidden No&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;Transient No&lt;/DIV&gt;
&lt;DIV id="sasLogError1_1725283858582" class="sasError"&gt;ERROR: The file or path '/Projects/Extract text from PDFs and create tables/mypdfs' is not available in the file system.&lt;/DIV&gt;
&lt;DIV id="sasLogError2_1725283858582" class="sasError"&gt;ERROR: The action stopped due to errors.&lt;/DIV&gt;
&lt;DIV id="sasLogNote12_1725283858582" class="sasNote"&gt;NOTE: Cloud Analytic Services processed the combined requests in 0.001124 seconds.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;107 quit;&lt;/DIV&gt;
&lt;DIV id="sasLogNote13_1725283858582" class="sasNote"&gt;NOTE: PROCEDURE CASUTIL used (Total process time):&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;real time 0.02 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;cpu time 0.03 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;108&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;109 proc casutil;&lt;/DIV&gt;
&lt;DIV id="sasLogNote14_1725283858582" class="sasNote"&gt;NOTE: The UUID '668012bb-0288-034f-9927-9c76fa3a3263' is connected using session MYSESSION.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;110 load casdata='' /* To read in all files use an empty string.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;110! For a single file specify the file name */&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;111 incaslib='ac_pdf' /* The location of the files to load */&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;112 importoptions=(fileType="document" fileExtList = 'PDF' tikaConv=True) /* Specify document import options */&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;113 casout='pdf_data' outcaslib='casuser' replace; /* Specify the output cas table info */&lt;/DIV&gt;
&lt;DIV id="sasLogError3_1725283858582" class="sasError"&gt;ERROR: When loading a document table the path value must be a directory. You can specify path="" to load documents from the root&lt;/DIV&gt;
&lt;DIV class="sasError"&gt;directory of the caslib. You can specify common file name extensions in the fileExtList parameter to restrict the documents&lt;/DIV&gt;
&lt;DIV class="sasError"&gt;to load.&lt;/DIV&gt;
&lt;DIV id="sasLogError4_1725283858582" class="sasError"&gt;ERROR: The action stopped due to errors.&lt;/DIV&gt;
&lt;DIV id="sasLogNote15_1725283858582" class="sasNote"&gt;NOTE: The Cloud Analytic Services server processed the request in 0.000582 seconds.&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;114 quit;&lt;/DIV&gt;
&lt;DIV id="sasLogNote16_1725283858582" class="sasNote"&gt;NOTE: The SAS System stopped processing this step because of errors.&lt;/DIV&gt;
&lt;DIV id="sasLogNote17_1725283858582" class="sasNote"&gt;NOTE: PROCEDURE CASUTIL used (Total process time):&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;real time 0.00 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;cpu time 0.01 seconds&lt;/DIV&gt;
&lt;DIV class="sasNote"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;115&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;116 %studio_hide_wrapper;&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;127&lt;/DIV&gt;
&lt;DIV class="sasSource"&gt;128&lt;/DIV&gt;
&lt;P&gt;&lt;LI-WRAPPER&gt;&lt;/LI-WRAPPER&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 02 Sep 2024 13:36:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Reading-pdf-files-on-SAS-Viya-3-5/m-p/942209#M369486</guid>
      <dc:creator>acordes</dc:creator>
      <dc:date>2024-09-02T13:36:17Z</dc:date>
    </item>
    <item>
      <title>Re: Reading pdf files on SAS Viya 3.5</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Reading-pdf-files-on-SAS-Viya-3-5/m-p/942218#M369491</link>
      <description>&lt;P&gt;When I upload my pdfs to a caslib via sftp then it works with the following code.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Except that it throws out the following problem note, but I think it should work once it gets solved by the admin.&amp;nbsp;&lt;/P&gt;
&lt;H2&gt;Problem Note&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;I&gt;69063:&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/I&gt;"ERROR: Failed to initialize a Java virtual machine in TKJNL" occurs with SAS® Cloud Analytic Services (CAS) actions that use Java&lt;/H2&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;
proc cas ;
session mySession;
   table.dropCaslib / caslib='_TMPCAS_' quiet=true; 
   table.dropCaslib / caslib='_LOADTMP' quiet=true; 
run;

/*** Macro variable setup ***/
/* Specify file path to your images (such as the giraffe_dolphin_small example data) */
%let imagePath = /caslibs/akaike/my_pdf/;

/* Specify the caslib and table name for your image data table */
%let imageCaslibName = casuser;
%let imageTableName = images;

/* Specify the caslib and table name for the augmented training image data table */
%let imageTrainingCaslibName = &amp;amp;imageCaslibName;
%let imageTrainingTableName = &amp;amp;imageTableName.Augmented;


proc cas;
   file log;
   table.dropCaslib /
   caslib='loadPDFTempCaslib' quiet = true;
 run;


/*** Load and display images ***/ 
/* Create temporary caslib and libref for loading images */ 
caslib loadPDFTempCaslib datasource=(srctype="path") path="&amp;amp;imagePath"
    subdirs notactive sessref=mySession;
 
libname _loadtmp cas caslib="loadPDFTempCaslib"; 
libname _tmpcas_ cas caslib="CASUSER"; 

proc casutil;
	list files incaslib='loadPDFTempCaslib'; 
quit;


proc casutil;
    load casdata=''                                                              /* To read in all files use an empty string. For a single file specify the file name */
         incaslib='loadPDFTempCaslib'                                            /* The location of the files to load */
         importoptions=(fileType="document" fileExtList = 'PDF' tikaConv=True)   /* Specify document import options   */
         casout='pdf_data' outcaslib='casuser' replace;                          /* Specify the output cas table info */
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 02 Sep 2024 16:11:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Reading-pdf-files-on-SAS-Viya-3-5/m-p/942218#M369491</guid>
      <dc:creator>acordes</dc:creator>
      <dc:date>2024-09-02T16:11:49Z</dc:date>
    </item>
  </channel>
</rss>

