<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: SAS access Google/Yahoo search engine to colect url lists in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113977#M259214</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi Tiffany,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Can you please post when you receive a successful outcome?&amp;nbsp; I could be very interested to see how things progress.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;Scott&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Sun, 18 Aug 2013 10:40:22 GMT</pubDate>
    <dc:creator>Scott_Mitchell</dc:creator>
    <dc:date>2013-08-18T10:40:22Z</dc:date>
    <item>
      <title>SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113970#M259207</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear SAS support communities,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried to use base SAS to collect url lists from search engine but failed. If I have a keyword "SAS+training" url "&lt;A href="http://www.google.co.jp/?gws_rd=cr#bav=on.2,or.&amp;amp;fp=4d59ced19cf73e3&amp;amp;q=sas%2Btraining"&gt;http://www.google.co.jp/?gws_rd=cr#bav=on.2,or.&amp;amp;fp=4d59ced19cf73e3&amp;amp;q=sas%2Btraining&lt;/A&gt;", can I collect the result links?&amp;nbsp; It'll be very helpful especially if I have more than 100 keywords to search.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you in advance!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Tiffany&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 17 Aug 2013 03:36:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113970#M259207</guid>
      <dc:creator>Tiffany1</dc:creator>
      <dc:date>2013-08-17T03:36:15Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113971#M259208</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;What did you try?&lt;/P&gt;&lt;P&gt;What where the issues?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Combining:&lt;/P&gt;&lt;P&gt;&lt;A href="http://support.sas.com/documentation/cdl/en/lestmtsref/65040/HTML/default/viewer.htm#p103pi2vrzn6qhn1e8alrs01jrb7.htm" title="http://support.sas.com/documentation/cdl/en/lestmtsref/65040/HTML/default/viewer.htm#p103pi2vrzn6qhn1e8alrs01jrb7.htm"&gt;SAS(R) 9.4 Statements: Reference&lt;/A&gt; ( FILENAME Statement, URL Access Method )&lt;/P&gt;&lt;P&gt;&lt;A href="http://support.sas.com/documentation/cdl/en/lestmtsref/65040/HTML/default/viewer.htm#n1rill4udj0tfun1fvce3j401plo.htm" title="http://support.sas.com/documentation/cdl/en/lestmtsref/65040/HTML/default/viewer.htm#n1rill4udj0tfun1fvce3j401plo.htm"&gt;SAS(R) 9.4 Statements: Reference&lt;/A&gt; ( input statement&amp;nbsp; example 5 multiple files)&lt;/P&gt;&lt;P&gt;Should give an approach.&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Macros to the rescue&lt;BR /&gt; &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 17 Aug 2013 09:12:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113971#M259208</guid>
      <dc:creator>jakarman</dc:creator>
      <dc:date>2013-08-17T09:12:33Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113972#M259209</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;My purpose is to collect the result links after searching a keyword from google instead of click and browse. I can successfully collect data in a static website like example 3 below.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;filename foo url &lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&amp;nbsp; '&lt;/SPAN&gt;&lt;A class="jive-link-external-small" href="http://support.sas.com/techsup/service_intro.html';"&gt;http://support.sas.com/techsup/service_intro.html';&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P&gt;data _null_;&lt;/P&gt;&lt;P&gt;&amp;nbsp; infile foo length=len;&lt;/P&gt;&lt;P&gt;&amp;nbsp; input record $varying200. len;&lt;/P&gt;&lt;P&gt;&amp;nbsp; put record $varying200. len;&lt;/P&gt;&lt;P&gt;&amp;nbsp; if _n_=15 then stop;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;However, if I put a google search page after url, I can't get the result links.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;filename foo url &lt;/P&gt;&lt;P&gt;"&lt;A class="jive-link-external-small" href="http://www.google.co.jp/?gws_rd=cr#bav=on.2,or.&amp;amp;fp=4d59ced19cf73e3&amp;amp;q=sas%2Btraining"&gt;http://www.google.co.jp/?gws_rd=cr#bav=on.2,or.&amp;amp;fp=4d59ced19cf73e3&amp;amp;q=sas%2Btraining&lt;/A&gt;" ;&lt;/P&gt;&lt;P&gt;;&amp;nbsp; &lt;/P&gt;&lt;P&gt;data _null_;&lt;/P&gt;&lt;P&gt;&amp;nbsp; infile foo length=len;&lt;/P&gt;&lt;P&gt;&amp;nbsp; input record $varying200. len;&lt;/P&gt;&lt;P&gt;&amp;nbsp; put record $varying200. len;&lt;/P&gt;&lt;P&gt;&amp;nbsp; if _n_=15 then stop;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you for your help.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 17 Aug 2013 09:50:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113972#M259209</guid>
      <dc:creator>Tiffany1</dc:creator>
      <dc:date>2013-08-17T09:50:16Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113973#M259210</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Tiffany,&amp;nbsp; am I seeing ' (single ) and " (double) being used as char?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The single ' usage will keep tekst untouched by sas-macro interpreter. &lt;/P&gt;&lt;P&gt;The single "' usage will let the sas-macro interpreter change the string as he likes.&amp;nbsp; Using &amp;amp;-chars and %chars (possible more) .&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P&gt;Changing from " to ' should help.&amp;nbsp;&amp;nbsp;&amp;nbsp; For simple testing the stream single name... (untested)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;filename foo url&amp;nbsp; '&lt;A class="jive-link-external-small" href="http://www.google.co.jp/?gws_rd=cr#bav=on.2,or.&amp;amp;fp=4d59ced19cf73e3&amp;amp;q=sas%2Btraining"&gt;http://www.google.co.jp/?gws_rd=cr#bav=on.2,or.&amp;amp;fp=4d59ced19cf73e3&amp;amp;q=sas%2Btraining&lt;/A&gt;' ;&lt;/P&gt;&lt;P&gt;data _null_ ;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; infile foo&amp;nbsp; length=len lrecl=? ; /* check lrecl setting as the maximum lenth is often 255 */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; input;&amp;nbsp; put _infie_ ;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* _infile_ is an automatic variable containing the whole record */&lt;/P&gt;&lt;P&gt;run ;&amp;nbsp; &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 17 Aug 2013 11:04:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113973#M259210</guid>
      <dc:creator>jakarman</dc:creator>
      <dc:date>2013-08-17T11:04:30Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113974#M259211</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Jaap,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I got 4 observations which are not related to the search result. The data looks like the homepage of Google. I couldn't find any "SAS" or "training" in the output. I tried Yahoo which is the same....&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Your help will be great appreciated.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 17 Aug 2013 16:11:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113974#M259211</guid>
      <dc:creator>Tiffany1</dc:creator>
      <dc:date>2013-08-17T16:11:04Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113975#M259212</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;The good thing is you are getting response.&amp;nbsp;&amp;nbsp; Japanese domain of google.&lt;/P&gt;&lt;P&gt;How buidling the webadresline is documented at google. You looks to have copied one generated by a web-page. the %2B is the + sign replacement probably done by te browser.&lt;/P&gt;&lt;P&gt;&lt;A href="https://developers.google.com/search-appliance/documentation/46/xml_reference#request_format" title="https://developers.google.com/search-appliance/documentation/46/xml_reference#request_format"&gt;Search Protocol Reference&lt;/A&gt; (google http:\\&amp;nbsp; developers.google.com/search-appliance/documentation/46/xml_reference#request_format ) I crashed the adress to have it shown&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You are sure to have changed the quotes to the lowercase one the single ' (us kb layout)?&lt;/P&gt;&lt;P&gt;Did you increase te reclen to 32767? Just 4 records, there must be a lot more data.&lt;/P&gt;&lt;P&gt;-- http does not know/use common record length of Windows/Unix it ís using css and html possivle some scritping being used for layout. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Message was edited by: Jaap Karman (added reclen note)&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 17 Aug 2013 17:22:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113975#M259212</guid>
      <dc:creator>jakarman</dc:creator>
      <dc:date>2013-08-17T17:22:25Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113976#M259213</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Jaap, thank you so much. It's helpful!&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 18 Aug 2013 00:31:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113976#M259213</guid>
      <dc:creator>Tiffany1</dc:creator>
      <dc:date>2013-08-18T00:31:16Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113977#M259214</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi Tiffany,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Can you please post when you receive a successful outcome?&amp;nbsp; I could be very interested to see how things progress.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;Scott&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sun, 18 Aug 2013 10:40:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113977#M259214</guid>
      <dc:creator>Scott_Mitchell</dc:creator>
      <dc:date>2013-08-18T10:40:22Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113978#M259215</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;And scott went on.... &lt;A _jive_internal="true" href="https://communities.sas.com/message/178205#178205"&gt;https://communities.sas.com/message/178205#178205&lt;/A&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 21 Aug 2013 05:09:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113978#M259215</guid>
      <dc:creator>jakarman</dc:creator>
      <dc:date>2013-08-21T05:09:19Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113979#M259216</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi Scott and Jaap,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I can extract data from webpage but still fail in extract data from search engine like google and yahoo.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 22 Aug 2013 01:01:06 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113979#M259216</guid>
      <dc:creator>Tiffany1</dc:creator>
      <dc:date>2013-08-22T01:01:06Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113980#M259217</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Even though search engines may be the largest web scraper applications, they do not take too kindly to being scraped themselves.&amp;nbsp; Recommended reading:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;A class="active_link" href="http://google-scraper.squabbel.com/" title="http://google-scraper.squabbel.com/"&gt;Scraping Google for Fun and Profit&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The process followed by this site could be replicated, without major headaches, in SAS, to move you along the path without any of the recommended items for using proxies your url should represent the following:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;filename i url '&lt;/SPAN&gt;&lt;A class="jive-link-external-small" href="http://www.google.com/search?q=sas+training&amp;amp;ie=utf-8&amp;amp;as_qdr=all&amp;amp;aq=t&amp;amp;rls=org:mozilla:us:official&amp;amp;client=firefox&amp;amp;num=100';"&gt;http://www.google.com/search?q=sas+training&amp;amp;ie=utf-8&amp;amp;as_qdr=all&amp;amp;aq=t&amp;amp;rls=org:mozilla:us:official&amp;amp;client=firefox&amp;amp;num=100';&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It is the addition of the browser information to the GET method which will return actual results rather than being interrupted by google's first minor layer of protection from scraping.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 22 Aug 2013 04:07:13 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113980#M259217</guid>
      <dc:creator>FriedEgg</dc:creator>
      <dc:date>2013-08-22T04:07:13Z</dc:date>
    </item>
    <item>
      <title>Re: SAS access Google/Yahoo search engine to colect url lists</title>
      <link>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113981#M259218</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;FriedEgg has a good argument.&amp;nbsp; Going to Spam or Scam with this kind of facilties will bring nothing.&lt;/P&gt;&lt;P&gt;At the same time his link is telling it should be technical possible to build that kind of functionality.&amp;nbsp;&amp;nbsp; &lt;BR /&gt;As long as you stay at normal behavior with a normal usage like a real person it should be able to get to work.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I wish I had a SAS installation (privately) available to investigate as having no access to SAS at the moment.&lt;/P&gt;&lt;P&gt;Always found it too expensive for home usage spending that much time with it at work.&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 22 Aug 2013 07:23:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/SAS-access-Google-Yahoo-search-engine-to-colect-url-lists/m-p/113981#M259218</guid>
      <dc:creator>jakarman</dc:creator>
      <dc:date>2013-08-22T07:23:58Z</dc:date>
    </item>
  </channel>
</rss>

