<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Segmentation of a huge datamart in SAS Enterprise Guide</title>
    <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97341#M8793</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I got another issue : &lt;/P&gt;&lt;P&gt;I do not have a dependant variable. It's just a list of 500 variables.&lt;/P&gt;&lt;P&gt;Any ideas on how to do the selection?&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Fri, 05 Apr 2013 09:19:25 GMT</pubDate>
    <dc:creator>M_A_C</dc:creator>
    <dc:date>2013-04-05T09:19:25Z</dc:date>
    <item>
      <title>Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97338#M8790</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is the deal :&lt;/P&gt;&lt;P&gt;I have a 15 Million Lines and 500 Variables which makes the huge dataset.&lt;/P&gt;&lt;P&gt;I Want to make a behavioral segmentation.&lt;/P&gt;&lt;P&gt;First, i have to choose the variables that are most significant to have just the essential elements and then proceed by k-means for segmentation.&lt;/P&gt;&lt;P&gt;How can i choose the significant variables?&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 04 Apr 2013 12:09:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97338#M8790</guid>
      <dc:creator>M_A_C</dc:creator>
      <dc:date>2013-04-04T12:09:22Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97339#M8791</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;a discriminant analysis&amp;nbsp; on a random sample will be usefull to keep relevant variables, start by using PROC STEPDISC.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 04 Apr 2013 13:37:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97339#M8791</guid>
      <dc:creator>HE</dc:creator>
      <dc:date>2013-04-04T13:37:12Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97340#M8792</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thank you, i'm testing it, i'll get back to you if i have any further questions &lt;img id="smileywink" class="emoticon emoticon-smileywink" src="https://communities.sas.com/i/smilies/16x16_smiley-wink.png" alt="Smiley Wink" title="Smiley Wink" /&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 04 Apr 2013 15:02:29 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97340#M8792</guid>
      <dc:creator>M_A_C</dc:creator>
      <dc:date>2013-04-04T15:02:29Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97341#M8793</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I got another issue : &lt;/P&gt;&lt;P&gt;I do not have a dependant variable. It's just a list of 500 variables.&lt;/P&gt;&lt;P&gt;Any ideas on how to do the selection?&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 05 Apr 2013 09:19:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97341#M8793</guid>
      <dc:creator>M_A_C</dc:creator>
      <dc:date>2013-04-05T09:19:25Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97342#M8794</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;No code, but some ideas.&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Subset the huge dataset.&amp;nbsp; A 1% random sample would probably do.&lt;/LI&gt;&lt;LI&gt;Use PROC VARCLUS to see how the 500 variables cluster.&lt;/LI&gt;&lt;LI&gt;Identify key variables from a business rule perspective within each variable cluster.&lt;/LI&gt;&lt;LI&gt;Use those variables in PROC FASTCLUS on the full dataset to get your k-means clustering.&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you have access to Enterprise Miner, then a lot of other techniques become available, most of which have the word "tree" in their name.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Steve Denham&lt;BR /&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 05 Apr 2013 12:21:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97342#M8794</guid>
      <dc:creator>SteveDenham</dc:creator>
      <dc:date>2013-04-05T12:21:58Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97343#M8795</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thank you very much, I'll get on it.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Fri, 05 Apr 2013 14:53:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97343#M8795</guid>
      <dc:creator>M_A_C</dc:creator>
      <dc:date>2013-04-05T14:53:30Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97344#M8796</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hope you have sorted your problem with methods described above.&lt;/P&gt;&lt;P&gt;Just wondering what types of variables you have and did you also try factor analysis and MODECLUS?&lt;/P&gt;&lt;P&gt;I had same problem with no. of significant variables, so curious to know which technique was most useful.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 08 Apr 2013 10:45:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97344#M8796</guid>
      <dc:creator>Varsha</dc:creator>
      <dc:date>2013-04-08T10:45:40Z</dc:date>
    </item>
    <item>
      <title>Re: Segmentation of a huge datamart</title>
      <link>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97345#M8797</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Varsha,&lt;/P&gt;&lt;P&gt;I am going to use SteveDenham idea, it's very logical and seems that it would work.&lt;/P&gt;&lt;P&gt;I am still on some other tasks that take memory as well. I tried it on another laptop and works just fine.&lt;/P&gt;&lt;P&gt;Proc varclus to see how the variables cluster and then from a business perspective i chose the one i judged important from each cluster and some others and then i added other ones even though they didn't show much in the clustering but they are necessary for this exercise.&lt;/P&gt;&lt;P&gt;Hope i won't run into any trouble, in that case i'll be back to bother you guys&lt;/P&gt;&lt;P&gt;good day to ye !&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 09 Apr 2013 12:10:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Enterprise-Guide/Segmentation-of-a-huge-datamart/m-p/97345#M8797</guid>
      <dc:creator>M_A_C</dc:creator>
      <dc:date>2013-04-09T12:10:35Z</dc:date>
    </item>
  </channel>
</rss>

