Building models with SAS Enterprise Miner, SAS Factory Miner, SAS Visual Data Mining and Machine Learning or just with programming

10-fold corss validation

Accepted Solution Solved
Reply
Contributor
Posts: 36
Accepted Solution

10-fold corss validation

I have a GLM model :

 

PROC GENMOD DATA = MYDATA;
CLASS COUNTRY JOB;
MODEL MONEY = COUNTRY JOB / DIST = GAMMA LINK = LOG;
RUN;


I want to estimate the prediction error. How can I do 10-fold cross validation on my data with SAS ?

 

 


Accepted Solutions
Solution
‎06-15-2017 04:55 AM
Super User
Posts: 10,044

Re: 10-fold corss validation

There is an example I wrote before. But it is for Logistic Regression.

 



/****** K-Fold CV ****/
%macro k_fold_cv(k=10);
ods select none;

proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score&i;
 merge true native est;
 retain id &i ;
 optimism=native-true;
run;
%end;
data k_fold_cv;
 set score1-score&k;
run;

ods select all;
%mend;

%k_fold_cv(k=10)








/*************************************/


%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &r;
proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&r._&i;
 merge true native est;
 retain rep &r id &i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;

View solution in original post


All Replies
Trusted Advisor
Posts: 1,932

Re: 10-fold corss validation

As far as I know, there is no way in SAS to cross-validate such a model. You'd have to write your own Cross-Validation code, or find a macro that someone else has written.

Solution
‎06-15-2017 04:55 AM
Super User
Posts: 10,044

Re: 10-fold corss validation

There is an example I wrote before. But it is for Logistic Regression.

 



/****** K-Fold CV ****/
%macro k_fold_cv(k=10);
ods select none;

proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score&i;
 merge true native est;
 retain id &i ;
 optimism=native-true;
run;
%end;
data k_fold_cv;
 set score1-score&k;
run;

ods select all;
%mend;

%k_fold_cv(k=10)








/*************************************/


%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &r;
proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&r._&i;
 merge true native est;
 retain rep &r id &i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;

☑ This topic is solved.

Need further help from the community? Please ask a new question.

Discussion stats
  • 2 replies
  • 209 views
  • 4 likes
  • 3 in conversation