<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: HPGENSELECT in Statistical Procedures</title>
    <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658203#M31531</link>
    <description>&lt;P&gt;Steve,&lt;/P&gt;
&lt;P&gt;Thanks again for your feedback. It is greatly appreciated!&lt;/P&gt;
&lt;P&gt;I actually got the code below to run without errors and it looks like it put those with Selected=1 into the training data set and those with&amp;nbsp;Selected=0 into the validation set. I got this in the lst file output:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="HPGenOutput.PNG" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/42899iC881BC646B82BF70/image-size/medium?v=v2&amp;amp;px=400" role="button" title="HPGenOutput.PNG" alt="HPGenOutput.PNG" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;* Create list of independent variables;
proc sql;
     select distinct
           field into: ind_vars separated by ' '
     from model.univariate_&amp;amp;dep_var
;quit;
 
* Randomly select 80% for training data
proc surveyselect data=out.final
                       method=srs
                       seed=43543
                       outall
                       samprate=0.8
                       out=subsets;
run;
 
ods listing;
proc hpgenselect data=subsets lassosteps=50;
     model case (event='1') = &amp;amp;ind_vars / dist = binomial;
     partition rolevar = selected(train = "1" validate ="0");
     selection method = lasso(choose = sbc );
     performance details;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Although there were no errors in the log, the code stopped after&amp;nbsp;Selected Effects: Intercept was entered into the model. Anyone know why that happened?&lt;/P&gt;</description>
    <pubDate>Fri, 12 Jun 2020 20:17:54 GMT</pubDate>
    <dc:creator>BTAinRVA</dc:creator>
    <dc:date>2020-06-12T20:17:54Z</dc:date>
    <item>
      <title>HPGENSELECT</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/657618#M31491</link>
      <description>&lt;P&gt;Hello Everybody!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Does anyone have any examples of using HPGENSELECT to partition a data set into train and test subsets on a binary dependent variable using the LASSO selection method. I haven't found much by googling.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks,&lt;/P&gt;
&lt;P&gt;Brian&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jun 2020 17:31:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/657618#M31491</guid>
      <dc:creator>BTAinRVA</dc:creator>
      <dc:date>2020-06-11T17:31:00Z</dc:date>
    </item>
    <item>
      <title>Re: HPGENSELECT</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/657658#M31495</link>
      <description>&lt;P&gt;Maybe I am missing a key point, but I think just adding the statement:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PARTITION fraction(test=0.25 validate=0.25 seed=1);&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;ought to randomly assign records to the training, test and validate subsets, such that half the original data is in the training subset and then half of the remaining data would go to the test subset and the other half to the validate subset.&amp;nbsp; Other proportions are probably better suited for a binary response, but this should serve as a starting point.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;SteveDenham&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jun 2020 19:02:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/657658#M31495</guid>
      <dc:creator>SteveDenham</dc:creator>
      <dc:date>2020-06-11T19:02:59Z</dc:date>
    </item>
    <item>
      <title>Re: HPGENSELECT</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/657707#M31498</link>
      <description>&lt;P&gt;Steve,&lt;/P&gt;
&lt;P&gt;Thanks for the reply! I think you are correct with the Partition statement. Have you used HPGENSELECT much? Does the code below seem reasonable and correct? Any guidance greatly appreciated!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;proc surveyselect data=out.final method=srs seed=43543 outall
  samprate=0.8 out=subsets;

proc hpgenselect data=subsets lassosteps=50;
	model &amp;amp;depvar (event='1') =  &amp;amp;VARLIST / dist=binomial;
	partition ROLEVAR=Selected(TRAIN="1" VALIDATE="0");
	selection method=LASSO(SELECT=SBC choose=validate stop=none);
run;&lt;/PRE&gt;</description>
      <pubDate>Thu, 11 Jun 2020 23:43:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/657707#M31498</guid>
      <dc:creator>BTAinRVA</dc:creator>
      <dc:date>2020-06-11T23:43:17Z</dc:date>
    </item>
    <item>
      <title>Re: HPGENSELECT</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658013#M31503</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/16672"&gt;@BTAinRVA&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;To be honest, all I know about HPGENSELECT is what I have read in the documentation - I've never actually had high-dimensional datasets where I had to do variable selection.&amp;nbsp; Given that caveat, I want to ask about the PARTITION statement you propose.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It looks like there must be a variable called 'Selected' in the data set 'Subsets', which takes on values of 0, 1 and stuff that is not 0 and 1.&amp;nbsp; Those with a 0 go into the test set, those with a 1 go into the validate set, and those with anything else go into the training set, which is what is operated on for variable selection. If Selected happens to only have 0 and 1 values, I don't think the PROC will work, as there would be no observations in the training set.&amp;nbsp; This is how the PARTITION statement works in other regression PROCS.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Good luck on this.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;SteveDenham&lt;/P&gt;</description>
      <pubDate>Fri, 12 Jun 2020 11:16:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658013#M31503</guid>
      <dc:creator>SteveDenham</dc:creator>
      <dc:date>2020-06-12T11:16:05Z</dc:date>
    </item>
    <item>
      <title>Re: HPGENSELECT</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658203#M31531</link>
      <description>&lt;P&gt;Steve,&lt;/P&gt;
&lt;P&gt;Thanks again for your feedback. It is greatly appreciated!&lt;/P&gt;
&lt;P&gt;I actually got the code below to run without errors and it looks like it put those with Selected=1 into the training data set and those with&amp;nbsp;Selected=0 into the validation set. I got this in the lst file output:&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="HPGenOutput.PNG" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/42899iC881BC646B82BF70/image-size/medium?v=v2&amp;amp;px=400" role="button" title="HPGenOutput.PNG" alt="HPGenOutput.PNG" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;* Create list of independent variables;
proc sql;
     select distinct
           field into: ind_vars separated by ' '
     from model.univariate_&amp;amp;dep_var
;quit;
 
* Randomly select 80% for training data
proc surveyselect data=out.final
                       method=srs
                       seed=43543
                       outall
                       samprate=0.8
                       out=subsets;
run;
 
ods listing;
proc hpgenselect data=subsets lassosteps=50;
     model case (event='1') = &amp;amp;ind_vars / dist = binomial;
     partition rolevar = selected(train = "1" validate ="0");
     selection method = lasso(choose = sbc );
     performance details;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Although there were no errors in the log, the code stopped after&amp;nbsp;Selected Effects: Intercept was entered into the model. Anyone know why that happened?&lt;/P&gt;</description>
      <pubDate>Fri, 12 Jun 2020 20:17:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658203#M31531</guid>
      <dc:creator>BTAinRVA</dc:creator>
      <dc:date>2020-06-12T20:17:54Z</dc:date>
    </item>
    <item>
      <title>Re: HPGENSELECT</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658965#M31545</link>
      <description>&lt;P&gt;Can you share the output?&amp;nbsp; My first thought is that no variables met the LASSO criterion for inclusion under SBC.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;SteveDenham&lt;/P&gt;</description>
      <pubDate>Mon, 15 Jun 2020 12:58:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/HPGENSELECT/m-p/658965#M31545</guid>
      <dc:creator>SteveDenham</dc:creator>
      <dc:date>2020-06-15T12:58:19Z</dc:date>
    </item>
  </channel>
</rss>

