<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Subsetting data- slowly slimming down a base file in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131375#M35720</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Just a slight change in how you approach the problem:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;data poboxes &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt; ruralroutes&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt; oneswithoutspaces&lt;/SPAN&gt;&lt;/SPAN&gt;;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp; set addresses;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp; if some conditions then do;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output poboxes;;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt; line-height: 1.5em;"&gt;&amp;nbsp; else if &lt;/SPAN&gt;&lt;SPAN style="font-size: 10pt; line-height: 1.5em;"&gt;some conditions then do;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output ruralroutes;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;&amp;nbsp; else if &lt;/SPAN&gt;&lt;SPAN style="font-size: 10pt;"&gt;some conditions then do;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;oneswithoutspaces&lt;/SPAN&gt;s;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Fri, 08 Mar 2013 18:09:24 GMT</pubDate>
    <dc:creator>art297</dc:creator>
    <dc:date>2013-03-08T18:09:24Z</dc:date>
    <item>
      <title>Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131374#M35719</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I am doing some basic data cleaning- addresses.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have a base file with all the observations.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I can write little code blocks that will peel off a subset of the data that meet this or that specific condition- this block deals with PO boxes, that block deals with Rural Routes, that block attacks things without spaces, etc.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;So what I have now is:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data poboxes;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;set addresses;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;if some conditions;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data manipulation;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data ruralroutes;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;set addresses;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;if some conditions;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data manipulation;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data oneswithoutspaces;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;set addresses;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;if some conditions;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data manipulation;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;etc etc&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;&lt;/P&gt;&lt;P&gt;what I want is to be able to break off this little part, deal with it, and then break off the next little part FROM WHAT IS LEFT and deal with that, and then break off another part FROM WHAT IS LEFT AFTER THAT, etc all the way down.&amp;nbsp; Creating an ever slimming "base" file.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;More like:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data poboxes;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;set addresses;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;if some conditions;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data manipulation;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data ruralroutes;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;set addresses minus the stuff I took out and put in poboxes;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;if some conditions;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data manipulation;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data oneswithoutspaces;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;set (addresses minus the stuff I took out and put in poboxes) minus the stuff I took out and put in ruralroutes;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;if some conditions;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;data manipulation;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;etc etc&lt;/P&gt;&lt;P style="padding-left: 60px;"&gt;&lt;/P&gt;&lt;P&gt;But I don't know the most elegant/best practice in coding structure to do this.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Suggestions?&amp;nbsp; Please?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Mike&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 08 Mar 2013 17:35:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131374#M35719</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2013-03-08T17:35:58Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131375#M35720</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Just a slight change in how you approach the problem:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;data poboxes &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt; ruralroutes&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt; oneswithoutspaces&lt;/SPAN&gt;&lt;/SPAN&gt;;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp; set addresses;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp; if some conditions then do;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output poboxes;;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt; line-height: 1.5em;"&gt;&amp;nbsp; else if &lt;/SPAN&gt;&lt;SPAN style="font-size: 10pt; line-height: 1.5em;"&gt;some conditions then do;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output ruralroutes;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;&amp;nbsp; else if &lt;/SPAN&gt;&lt;SPAN style="font-size: 10pt;"&gt;some conditions then do;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;oneswithoutspaces&lt;/SPAN&gt;s;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 08 Mar 2013 18:09:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131375#M35720</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2013-03-08T18:09:24Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131376#M35721</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;addresses data&amp;nbsp; is really hard to clean;&lt;/P&gt;&lt;P&gt;currently, i do just try to scan first one or two words as key words write your if condition; hopefully these help&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;e.g.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data poboxes;&lt;/P&gt;&lt;P&gt;set addresses;&lt;/P&gt;&lt;P&gt;if scan(address,1) in ('PO', 'POBOX', 'PBOX') THEN&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; DO;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ADDRSS = 'POBOX'||' '||SCAN(ADDRESS,-1) ;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; END;&lt;/P&gt;&lt;P&gt;ELSE IF .....THEN DO;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ....&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; END;&lt;/P&gt;&lt;P&gt;RUN;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 08 Mar 2013 20:32:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131376#M35721</guid>
      <dc:creator>Hao</dc:creator>
      <dc:date>2013-03-08T20:32:55Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131377#M35722</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;That did it!.&amp;nbsp; Thanks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The "if..ends" and "else if ...ends" do peel away the small groups I want to work with. &lt;/P&gt;&lt;P&gt;I just had to figure out the syntax to leave me with the leftovers, as it were.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;if some conditions then &lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;do;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output poboxes;;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;else if &lt;/SPAN&gt;&lt;SPAN style="font-size: 10pt;"&gt;some conditions then &lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;do;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output ruralroutes;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;&amp;nbsp; &lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;else if &lt;/SPAN&gt;&lt;SPAN style="font-size: 10pt;"&gt;some conditions then &lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&lt;SPAN style="font-size: 10pt;"&gt;do;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;oneswithoutspaces&lt;/SPAN&gt;s;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; else&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do;&lt;/P&gt;&lt;P style="padding: 0px 0px 0px 60px; font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data manipulation;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output everythingelse;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue',Helvetica,Arial,'Lucida Grande',sans-serif; background-color: #ffffff;"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 08 Mar 2013 22:27:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131377#M35722</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2013-03-08T22:27:57Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131378#M35723</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I agree.&amp;nbsp; I think the data will never clean 100 percent on SCAN, TRANWRD, STRIP, SUBSTR, etc alone.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have to match a target file to a reference file.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My plan is to auto clean down the relatively easy stuff (upper-lower case, PO boxes, rural routes), do all the exact matches first, then fuzzy match (using compged), and then hand clean.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If I can get the match rate in the high 90's percent wise with less than 100 hand cleanings required I will call it a success.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I'm starting with at this point no more than 150,000 records in the target file..&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 08 Mar 2013 22:34:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131378#M35723</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2013-03-08T22:34:44Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131379#M35724</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi, HB&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I've done this sort of thing a couple of times. Your idea is sound, it'll just come down to how messy the data is.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;One very good tool for this kind of thing is the PRX suite of functions, to use Regular Expressions. The good news is, they are INCREDIBLY powerful and flexible. The bad news is, they can be INCREDIBLY difficult to figure out exactly what you want, and to get them working properly.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Good luck!&lt;/P&gt;&lt;P&gt;&amp;nbsp; Tom&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Sat, 09 Mar 2013 15:35:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131379#M35724</guid>
      <dc:creator>TomKari</dc:creator>
      <dc:date>2013-03-09T15:35:33Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131380#M35725</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thanks for the encouragement.&amp;nbsp; I have used regular expressions before in ASP/VBscript on unrelated stuff and I may end up trying to use them again.&amp;nbsp; I agree with your assessment.&amp;nbsp; I liken them to a good sharp knife- an excellent tool but quite capable of slicing your leg if you aren't paying attention.&amp;nbsp; I actually did that- 4 staples.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Addresses seem to have a few more rules than names (I think names are awful).&amp;nbsp; Most addresses start with a number, for example, and I'm hoping I can work with stuff like that.&amp;nbsp; I don't actually have my reference file for matching yet, so I'm really just sort of messing around with ground work and a dumy match file prior to getting in the game.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My next step is to search the Net for somebody else's work I can steal/borrow from. This task absolutely has been done before, the questions is has anybody put it out on the Net for little old me to use.&amp;nbsp; Off to look.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 11 Mar 2013 14:56:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131380#M35725</guid>
      <dc:creator>HB</dc:creator>
      <dc:date>2013-03-11T14:56:37Z</dc:date>
    </item>
    <item>
      <title>Re: Subsetting data- slowly slimming down a base file</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131381#M35726</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi HB,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I did these process for acqusition campaigns for a year; matched third party several million records with existing customers&amp;nbsp; database; the way I did kind of like baysien rules hopfully these help you or any other people who have the same issues;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;e.g. column required: name / address/ postal code;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;1. match all records by postal code level (really depends on database size, if you have more than million records it will take really long time for entire fuzzy matching process),&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;2. most of address read as suit # - street number&amp;nbsp; street name or street number&amp;nbsp; street name suit #; try to scan the first / second word (street number) to reduce the records numbers; (Po Box may run seperately)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;3. scan two or three street name words use function compged to fuzzy match; then you will have matched at building level;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;4. if you have name almost do the same thing as step3; I pretty sure you may have a really good match rate;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;PS. necessary data cleanning is required before the whole process; e.g six -&amp;gt; 6, highway -&amp;gt; hwy&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hopefully these work for u;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Good luck&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hao&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 11 Mar 2013 18:39:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Subsetting-data-slowly-slimming-down-a-base-file/m-p/131381#M35726</guid>
      <dc:creator>Hao</dc:creator>
      <dc:date>2013-03-11T18:39:28Z</dc:date>
    </item>
  </channel>
</rss>

