<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Controlling INPUT Pointer With DLM or DLMSTR in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772877#M245437</link>
    <description>&lt;P&gt;I'm trying to scrape webpages. The following minimum example collects the HREFs and titles of current Latest Post items from a SAS Support Communities homepage.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _null_;
	infile "https://communities.sas.com/" url truncover;
	input;
	_infile_=strip(tranwrd(_infile_,"09"x,""));
	if _infile_="&amp;lt;!-- TAB ONE BEGINS --&amp;gt;" then i+1;
	else if _infile_="&amp;lt;!-- TAB ONE ENDS --&amp;gt;" then i+-1;
	file "!userprofile\desktop\page.htm";
	if _infile_^="" &amp;amp; i then put _infile_;
run;

data page;
	infile "!userprofile\desktop\page.htm" dlm='"' truncover;
	input @;
	if _infile_=:'&amp;lt;a class="p' then do;
		input @'href="' href :$160.;
		input title :$160.;
	end;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I'd like to give more detailed directions as the data aren't readily available.&lt;/P&gt;</description>
    <pubDate>Thu, 07 Oct 2021 20:29:52 GMT</pubDate>
    <dc:creator>Junyong</dc:creator>
    <dc:date>2021-10-07T20:29:52Z</dc:date>
    <item>
      <title>Controlling INPUT Pointer With DLM or DLMSTR</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772848#M245423</link>
      <description>&lt;P&gt;Suppose I read the following data.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*SPAN is incorrectly read*/
data want1;
	infile cards dlm="'""" truncover;
	input @"name='" name :$16. @'value="' value :$16. @'title="' title :$32. @"&amp;lt;span&amp;gt;" span :$16. @'etc="' etc :$16.;
cards;
name='united states' value="123456" title="&amp;lt;b&amp;gt;results 2016&amp;lt;/b&amp;gt;" &amp;lt;span&amp;gt;new york&amp;lt;/span&amp;gt; etc="jimmy"
name='canada' value="12345" title="yearly statistics" &amp;lt;span&amp;gt;toronto&amp;lt;/span&amp;gt; etc="dawn"
name='japan' value="1234567" title="annual bulletin" &amp;lt;span&amp;gt;osaka&amp;lt;/span&amp;gt; etc="fujita"
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Then SPAN will be read incorrectly. The problem is that I cannot include &amp;lt; because some other variables include it.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*cannot include &amp;lt; as other variables contain it*/
data want2;
	infile cards dlm="'""&amp;lt;" truncover;
	input @"name='" name :$16. @'value="' value :$16. @'title="' title :$32. @"&amp;lt;span&amp;gt;" span :$16. @'etc="' etc :$16.;
cards;
name='united states' value="123456" title="&amp;lt;b&amp;gt;results 2016&amp;lt;/b&amp;gt;" &amp;lt;span&amp;gt;new york&amp;lt;/span&amp;gt; etc="jimmy"
name='canada' value="12345" title="yearly statistics" &amp;lt;span&amp;gt;toronto&amp;lt;/span&amp;gt; etc="dawn"
name='japan' value="1234567" title="annual bulletin" &amp;lt;span&amp;gt;osaka&amp;lt;/span&amp;gt; etc="fujita"
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Thus, including &amp;lt; in DLM will influence TITLE. Instead, I saw DLMSTR, which can handle multi-character delimiters as follows. The problem, however, is that it also affects characters in DLM, so the results are totally changed as follows.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*DLMSTR worsens this issue*/
data want3;
	infile cards dlm="'""" dlmstr="&amp;lt;/span&amp;gt;" truncover;
	input @"name='" name :$16. @'value="' value :$16. @'title="' title :$32. @"&amp;lt;span&amp;gt;" span :$16. @'etc="' etc :$16.;
cards;
name='united states' value="123456" title="&amp;lt;b&amp;gt;results 2016&amp;lt;/b&amp;gt;" &amp;lt;span&amp;gt;new york&amp;lt;/span&amp;gt; etc="jimmy"
name='canada' value="12345" title="yearly statistics" &amp;lt;span&amp;gt;toronto&amp;lt;/span&amp;gt; etc="dawn"
name='japan' value="1234567" title="annual bulletin" &amp;lt;span&amp;gt;osaka&amp;lt;/span&amp;gt; etc="fujita"
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;In this case, can I control the ending point of SPAN via&amp;nbsp;@ so that I can correctly read the strings inside (new york, toronto, and osaka)?&lt;/P&gt;</description>
      <pubDate>Thu, 07 Oct 2021 18:44:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772848#M245423</guid>
      <dc:creator>Junyong</dc:creator>
      <dc:date>2021-10-07T18:44:43Z</dc:date>
    </item>
    <item>
      <title>Re: Controlling INPUT Pointer With DLM or DLMSTR</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772868#M245431</link>
      <description>&lt;P&gt;What file type is this coming from? Some of the stuff looks like it might come from XML which has a LIBNAME to read (many) forms.&lt;/P&gt;
&lt;P&gt;Or perhaps a website? Proc HTTP might be the way to go.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 07 Oct 2021 19:50:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772868#M245431</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2021-10-07T19:50:25Z</dc:date>
    </item>
    <item>
      <title>Re: Controlling INPUT Pointer With DLM or DLMSTR</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772877#M245437</link>
      <description>&lt;P&gt;I'm trying to scrape webpages. The following minimum example collects the HREFs and titles of current Latest Post items from a SAS Support Communities homepage.&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data _null_;
	infile "https://communities.sas.com/" url truncover;
	input;
	_infile_=strip(tranwrd(_infile_,"09"x,""));
	if _infile_="&amp;lt;!-- TAB ONE BEGINS --&amp;gt;" then i+1;
	else if _infile_="&amp;lt;!-- TAB ONE ENDS --&amp;gt;" then i+-1;
	file "!userprofile\desktop\page.htm";
	if _infile_^="" &amp;amp; i then put _infile_;
run;

data page;
	infile "!userprofile\desktop\page.htm" dlm='"' truncover;
	input @;
	if _infile_=:'&amp;lt;a class="p' then do;
		input @'href="' href :$160.;
		input title :$160.;
	end;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I'd like to give more detailed directions as the data aren't readily available.&lt;/P&gt;</description>
      <pubDate>Thu, 07 Oct 2021 20:29:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772877#M245437</guid>
      <dc:creator>Junyong</dc:creator>
      <dc:date>2021-10-07T20:29:52Z</dc:date>
    </item>
    <item>
      <title>Re: Controlling INPUT Pointer With DLM or DLMSTR</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772906#M245447</link>
      <description>&lt;P&gt;This works with your original example:&lt;/P&gt;
&lt;PRE&gt;data want1;
	infile cards dlm=" " dsd truncover;
	input @"name=" name :$16. @'value=' value :$16. @'title=' title :$32. @'etc=' etc :$16.;
   length span $ 16;
   startpos = index(_infile_,'&amp;lt;span&amp;gt;')+6;
   endpos   =  index(_infile_,'&amp;lt;/span&amp;gt;');
   span = substr(_infile_,startpos,endpos-startpos);
cards;
name='united states' value="123456" title="&amp;lt;b&amp;gt;results 2016&amp;lt;/b&amp;gt;" &amp;lt;span&amp;gt;new york&amp;lt;/span&amp;gt; etc="jimmy"
name='canada' value="12345" title="yearly statistics" &amp;lt;span&amp;gt;toronto&amp;lt;/span&amp;gt; etc="dawn"
name='japan' value="1234567" title="annual bulletin" &amp;lt;span&amp;gt;osaka&amp;lt;/span&amp;gt; etc="fujita"
;&lt;/PRE&gt;
&lt;P&gt;Note that I removed the quotes from the DLM. Values inside quotes will be read just fine. So also removed the quote ending the&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/180732"&gt;@name&lt;/a&gt; strings. If you change the informat on Value to 16. it will read as number if that is a desired result.&lt;/P&gt;
&lt;P&gt;The DSD option prevents the spaces inside the quoted strings from use as delimiters.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Warning: you may want to specify a larger than 256 for your infile URL. I think the URL may default to 256, at least the FILENAME statement will default to that for when using a URL Filename statement.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You would likely want to drop the Startpos and Endpos variables I created for finding the value of Span. Or move the Index function calls into the substr call.&lt;/P&gt;</description>
      <pubDate>Thu, 07 Oct 2021 23:12:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772906#M245447</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2021-10-07T23:12:22Z</dc:date>
    </item>
    <item>
      <title>Re: Controlling INPUT Pointer With DLM or DLMSTR</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772949#M245457</link>
      <description>&lt;P&gt;Another way:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data WANT;
  infile cards dlm="'""" truncover ;
  input @'name=' NAME $:20. @'value=' VALUE $:20. @'title=' TITLE $:20.  @'&amp;lt;span&amp;gt;' CITY $:20. ETC $:20. ;
  CITY=scan(CITY,1,'&amp;lt;');  
cards;
name='united states' value="123456" title="&amp;lt;b&amp;gt;results 2016&amp;lt;/b&amp;gt;" &amp;lt;span&amp;gt;new york&amp;lt;/span&amp;gt; etc="jimmy"
name='canada' value="12345" title="yearly statistics" &amp;lt;span&amp;gt;toronto&amp;lt;/span&amp;gt; etc="dawn"
name='japan' value="1234567" title="annual bulletin" &amp;lt;span&amp;gt;osaka&amp;lt;/span&amp;gt; etc="fujita"
;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 08 Oct 2021 07:40:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Controlling-INPUT-Pointer-With-DLM-or-DLMSTR/m-p/772949#M245457</guid>
      <dc:creator>ChrisNZ</dc:creator>
      <dc:date>2021-10-08T07:40:26Z</dc:date>
    </item>
  </channel>
</rss>

