<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Split sample and 3 fold cross validation logistic regression in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350040#M273592</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;I am not advanced SAS user and I will need your help. I am working on a dataset with 6 predictors (3 contiuous, 3 categorical) with binary outcome. I am trying to do the followings:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1. Split the data into 50% training and 50% validating datasets and then compare their ROC curves, SAS code:&lt;/P&gt;
&lt;P&gt;proc glmselect data=roc;&lt;BR /&gt;&amp;nbsp;&amp;nbsp; partition fraction(validate=0.5);&lt;BR /&gt;&amp;nbsp;&amp;nbsp; model ca=wt cons age race smoking BP / selection=forward;&lt;BR /&gt;&amp;nbsp;&amp;nbsp; output out=outDataForward;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;proc glmselect data=outDataForward;&lt;BR /&gt;&amp;nbsp;model comp=wgtdx consult agedx race smk hyp/selection=backward;&lt;BR /&gt;&amp;nbsp;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;2. Conduct 3-fold split of the sample and then compare the ROC and AUC curves for the 3 datasets (training, valdiating, testing), SAS code:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc glmselect data=roc;&lt;BR /&gt;&amp;nbsp;partition fraction(test=0.25 validate=0.25);&lt;BR /&gt;&amp;nbsp;model comp=wgtdx consult agedx race smk hyp/selection=forward;&lt;BR /&gt;&amp;nbsp;output out=outDataForward;&lt;BR /&gt;&amp;nbsp;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am sure my codes are not correct/complete and I will appreciate someone explains to me (step by step with details) how to split my dataset into two equal parts and 3-parts and then compare these models and obtain ROC and AUV values.&lt;/P&gt;
&lt;P&gt;Thanks in advance.&lt;/P&gt;
&lt;P&gt;SU&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 14 Apr 2017 13:51:16 GMT</pubDate>
    <dc:creator>sas_user4</dc:creator>
    <dc:date>2017-04-14T13:51:16Z</dc:date>
    <item>
      <title>Split sample and 3 fold cross validation logistic regression</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350040#M273592</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;I am not advanced SAS user and I will need your help. I am working on a dataset with 6 predictors (3 contiuous, 3 categorical) with binary outcome. I am trying to do the followings:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1. Split the data into 50% training and 50% validating datasets and then compare their ROC curves, SAS code:&lt;/P&gt;
&lt;P&gt;proc glmselect data=roc;&lt;BR /&gt;&amp;nbsp;&amp;nbsp; partition fraction(validate=0.5);&lt;BR /&gt;&amp;nbsp;&amp;nbsp; model ca=wt cons age race smoking BP / selection=forward;&lt;BR /&gt;&amp;nbsp;&amp;nbsp; output out=outDataForward;&lt;BR /&gt;run;&lt;/P&gt;
&lt;P&gt;proc glmselect data=outDataForward;&lt;BR /&gt;&amp;nbsp;model comp=wgtdx consult agedx race smk hyp/selection=backward;&lt;BR /&gt;&amp;nbsp;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;2. Conduct 3-fold split of the sample and then compare the ROC and AUC curves for the 3 datasets (training, valdiating, testing), SAS code:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;proc glmselect data=roc;&lt;BR /&gt;&amp;nbsp;partition fraction(test=0.25 validate=0.25);&lt;BR /&gt;&amp;nbsp;model comp=wgtdx consult agedx race smk hyp/selection=forward;&lt;BR /&gt;&amp;nbsp;output out=outDataForward;&lt;BR /&gt;&amp;nbsp;run;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am sure my codes are not correct/complete and I will appreciate someone explains to me (step by step with details) how to split my dataset into two equal parts and 3-parts and then compare these models and obtain ROC and AUV values.&lt;/P&gt;
&lt;P&gt;Thanks in advance.&lt;/P&gt;
&lt;P&gt;SU&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 14 Apr 2017 13:51:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350040#M273592</guid>
      <dc:creator>sas_user4</dc:creator>
      <dc:date>2017-04-14T13:51:16Z</dc:date>
    </item>
    <item>
      <title>Re: Split sample and 3 fold cross validation logistic regression</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350042#M273593</link>
      <description>&lt;P&gt;Please explain to me the code espcially if it is a MACRO so I can apply it to my dataset.&lt;/P&gt;
&lt;P&gt;Thank you so much!&lt;/P&gt;</description>
      <pubDate>Fri, 14 Apr 2017 13:53:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350042#M273593</guid>
      <dc:creator>sas_user4</dc:creator>
      <dc:date>2017-04-14T13:53:49Z</dc:date>
    </item>
    <item>
      <title>Re: Split sample and 3 fold cross validation logistic regression</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350225#M273594</link>
      <description>&lt;P&gt;PROC GLMSELECT can't be applied to logistic regression(binomial distribution),&lt;/P&gt;
&lt;P&gt;Try PROC HPGENSELECT.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The following is 10 fold cross validation for 20 times for proc logistic. You can start with it.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;

%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &amp;amp;r;
proc surveyselect data=sashelp.heart group=&amp;amp;k out=have;
run;

%do i=1 %to &amp;amp;k ;
data training;
 set have(where=(groupid ne &amp;amp;i)) ;
run;
data test;
 set have(where=(groupid eq &amp;amp;i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&amp;amp;r._&amp;amp;i;
 merge true native est;
 retain rep &amp;amp;r id &amp;amp;i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;


&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 15 Apr 2017 03:18:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Split-sample-and-3-fold-cross-validation-logistic-regression/m-p/350225#M273594</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-04-15T03:18:27Z</dc:date>
    </item>
  </channel>
</rss>

