<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Web scraping PROC HTTP:  NOTE: 404 Not Found in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879764#M347561</link>
    <description>&lt;P&gt;&lt;A href="https://blogs.sas.com/content/sasdummy/2018/01/23/check-json-and-http/" target="_self"&gt;Here are some tips for checking that PROC HTTP is usable&lt;/A&gt; with your internet access in your SAS session.&lt;/P&gt;</description>
    <pubDate>Thu, 08 Jun 2023 13:35:20 GMT</pubDate>
    <dc:creator>ChrisHemedinger</dc:creator>
    <dc:date>2023-06-08T13:35:20Z</dc:date>
    <item>
      <title>Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879734#M347552</link>
      <description>&lt;P&gt;Hi experts. I'm using the following example:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;filename src temp;
proc http
url="https://www.sas.com/en/whitepapers/artificial-intelligence-banking-risk-management-110277.br.html#formsuccess?utm_source=linkedin&amp;amp;utm_medium=paid-social&amp;amp;utm_campaign=rsk-gen-emea&amp;amp;utm_content=50931-lklgf-english"
out=src;
run;

&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;The scraping produce the following note:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;PRE&gt;&lt;CODE class=""&gt;73 filename src temp;
74 proc http
75 method="get"
76 url="https://www.sas.com/en/whitepapers/artificial-intelligence-banking-risk-management-110277.br.html#formsuccess?utm_so
76 ! urce=linkedin&amp;amp;utm_medium=paid-social&amp;amp;utm_campaign=rsk-gen-emea&amp;amp;utm_content=50931-lklgf-english"
WARNING: Apparent symbolic reference UTM_MEDIUM not resolved.
WARNING: Apparent symbolic reference UTM_CAMPAIGN not resolved.
WARNING: Apparent symbolic reference UTM_CONTENT not resolved.
77 out=src;
78 run;

NOTE: 404 Not Found
NOTE: PROCEDURE HTTP ha utilizado (Tiempo de proceso total):
real time 0.11 seconds
cpu time&lt;/CODE&gt;&lt;/PRE&gt;However, the URL is reachable and it has an HTML structure that should be possible to scrape&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class=""&gt;Any idea about why the scraping is failing here?&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 11:43:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879734#M347552</guid>
      <dc:creator>dcortell</dc:creator>
      <dc:date>2023-06-08T11:43:07Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879738#M347554</link>
      <description>&lt;P&gt;Try to use single qoutes for the url option, unless you are actually trying to use SAS macro variables&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 12:05:56 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879738#M347554</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2023-06-08T12:05:56Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879749#M347556</link>
      <description>&lt;P&gt;I suspect that the webserver that SAS is using to host that page is not playing well with PROC HTTP.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Since it is a SAS site why not open a SAS Support ticket and let them debug their own site.&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 12:44:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879749#M347556</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2023-06-08T12:44:53Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879756#M347557</link>
      <description>&lt;P&gt;I suspect your environment is working through a proxy server and you might need to specify PROXY= options.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="sas"&gt;filename resp temp;
 
proc http
 url="https://www.sas.com/en/whitepapers/artificial-intelligence-banking-risk-management-110277.br.html"
 out=resp;
run;

data _null_;
 infile resp;
 input ;
 put _infile_ ;
run;&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;But what you trying to do here? This page is a gate to a whitepaper, but it won't get you the paper itself -- that's a download process with a PDF.&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 13:15:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879756#M347557</guid>
      <dc:creator>ChrisHemedinger</dc:creator>
      <dc:date>2023-06-08T13:15:22Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879761#M347559</link>
      <description>&lt;P&gt;If works for me from SAS Viya Learing Ed:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;82   filename src temp;
83   proc http
84   url='https://www.sas.com/en/whitepapers/artificial-intelligence-banking-risk-management-110277.br.html#formsuccess?utm_source=l
84 ! inkedin&amp;amp;utm_medium=paid-social&amp;amp;utm_campaign=rsk-gen-emea&amp;amp;utm_content=50931-lklgf-english'
85   out=src;
86   run;
NOTE: PROCEDURE HTTP used (Total process time):
      real time           0.42 seconds
      cpu time            0.03 seconds&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 08 Jun 2023 13:20:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879761#M347559</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2023-06-08T13:20:43Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879763#M347560</link>
      <description>&lt;P&gt;Hi Chris, testing proc http under different conditions&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 13:32:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879763#M347560</guid>
      <dc:creator>dcortell</dc:creator>
      <dc:date>2023-06-08T13:32:40Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879764#M347561</link>
      <description>&lt;P&gt;&lt;A href="https://blogs.sas.com/content/sasdummy/2018/01/23/check-json-and-http/" target="_self"&gt;Here are some tips for checking that PROC HTTP is usable&lt;/A&gt; with your internet access in your SAS session.&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 13:35:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879764#M347561</guid>
      <dc:creator>ChrisHemedinger</dc:creator>
      <dc:date>2023-06-08T13:35:20Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879767#M347563</link>
      <description>&lt;P&gt;Running the test code getting the following error:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;92 /* Tell SAS to parse the JSON response */&lt;/DIV&gt;&lt;DIV class=""&gt;93 libname stream JSON fileref=resp;&lt;/DIV&gt;&lt;DIV class=""&gt;NOTE: JSON data is only read once. To read the JSON again, reassign the JSON LIBNAME.&lt;/DIV&gt;&lt;DIV class=""&gt;ERROR: JSON no válido en input cerca de la línea 1 columna 1: Encountered an illegal character.&lt;/DIV&gt;&lt;DIV class=""&gt;ERROR: Error in the LIBNAME statement.&lt;/DIV&gt;&lt;DIV class=""&gt;94&lt;/DIV&gt;&lt;DIV class=""&gt;95 title "JSON library structure";&lt;/DIV&gt;&lt;DIV class=""&gt;96 proc datasets lib=stream;&lt;/DIV&gt;&lt;DIV class=""&gt;ERROR: Libref STREAM is not assigned.&lt;/DIV&gt;&lt;DIV class=""&gt;97 quit;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 08 Jun 2023 13:50:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879767#M347563</guid>
      <dc:creator>dcortell</dc:creator>
      <dc:date>2023-06-08T13:50:29Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879769#M347564</link>
      <description>&lt;P&gt;EDIT: Runned for a second iteration, the code works fine and no error is generated&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 13:53:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879769#M347564</guid>
      <dc:creator>dcortell</dc:creator>
      <dc:date>2023-06-08T13:53:45Z</dc:date>
    </item>
    <item>
      <title>Re: Web scraping PROC HTTP:  NOTE: 404 Not Found</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879776#M347567</link>
      <description>&lt;P&gt;In addition to Chris test,&amp;nbsp; &amp;nbsp;If I cut the url at the ".html" part and provide it, the scrape works fine:&lt;/P&gt;
&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%let url='https://www.sas.com/en/whitepapers/artificial-intelligence-banking-risk-management-110277.br.html';

filename src temp;
proc http
url=&amp;amp;url
out=src;
run;

data _null_;
infile src;
input;
list;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;
&lt;P class=""&gt;I believe then it could make sense just to make similar URLs being trimmed at the ".HTML" part, if that allow the scraping, but still not sure why for some folks the full URL scrape works fine while on mine no&lt;/P&gt;</description>
      <pubDate>Thu, 08 Jun 2023 15:19:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Web-scraping-PROC-HTTP-NOTE-404-Not-Found/m-p/879776#M347567</guid>
      <dc:creator>dcortell</dc:creator>
      <dc:date>2023-06-08T15:19:18Z</dc:date>
    </item>
  </channel>
</rss>

