BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
John4
Obsidian | Level 7

I have a GLM model :

 

PROC GENMOD DATA = MYDATA;
CLASS COUNTRY JOB;
MODEL MONEY = COUNTRY JOB / DIST = GAMMA LINK = LOG;
RUN;


I want to estimate the prediction error. How can I do 10-fold cross validation on my data with SAS ?

 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

There is an example I wrote before. But it is for Logistic Regression.

 



/****** K-Fold CV ****/
%macro k_fold_cv(k=10);
ods select none;

proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score&i;
 merge true native est;
 retain id &i ;
 optimism=native-true;
run;
%end;
data k_fold_cv;
 set score1-score&k;
run;

ods select all;
%mend;

%k_fold_cv(k=10)








/*************************************/


%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &r;
proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&r._&i;
 merge true native est;
 retain rep &r id &i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;

View solution in original post

2 REPLIES 2
PaigeMiller
Diamond | Level 26

As far as I know, there is no way in SAS to cross-validate such a model. You'd have to write your own Cross-Validation code, or find a macro that someone else has written.

--
Paige Miller
Ksharp
Super User

There is an example I wrote before. But it is for Logistic Regression.

 



/****** K-Fold CV ****/
%macro k_fold_cv(k=10);
ods select none;

proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score&i;
 merge true native est;
 retain id &i ;
 optimism=native-true;
run;
%end;
data k_fold_cv;
 set score1-score&k;
run;

ods select all;
%mend;

%k_fold_cv(k=10)








/*************************************/


%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &r;
proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&r._&i;
 merge true native est;
 retain rep &r id &i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;

hackathon24-white-horiz.png

The 2025 SAS Hackathon Kicks Off on June 11!

Watch the live Hackathon Kickoff to get all the essential information about the SAS Hackathon—including how to join, how to participate, and expert tips for success.

YouTube LinkedIn

How to choose a machine learning algorithm

Use this tutorial as a handy guide to weigh the pros and cons of these commonly used machine learning algorithms.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 2 replies
  • 6380 views
  • 4 likes
  • 3 in conversation