BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
John4
Obsidian | Level 7

I have a GLM model :

 

PROC GENMOD DATA = MYDATA;
CLASS COUNTRY JOB;
MODEL MONEY = COUNTRY JOB / DIST = GAMMA LINK = LOG;
RUN;


I want to estimate the prediction error. How can I do 10-fold cross validation on my data with SAS ?

 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User

There is an example I wrote before. But it is for Logistic Regression.

 



/****** K-Fold CV ****/
%macro k_fold_cv(k=10);
ods select none;

proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score&i;
 merge true native est;
 retain id &i ;
 optimism=native-true;
run;
%end;
data k_fold_cv;
 set score1-score&k;
run;

ods select all;
%mend;

%k_fold_cv(k=10)








/*************************************/


%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &r;
proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&r._&i;
 merge true native est;
 retain rep &r id &i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;

View solution in original post

2 REPLIES 2
PaigeMiller
Diamond | Level 26

As far as I know, there is no way in SAS to cross-validate such a model. You'd have to write your own Cross-Validation code, or find a macro that someone else has written.

--
Paige Miller
Ksharp
Super User

There is an example I wrote before. But it is for Logistic Regression.

 



/****** K-Fold CV ****/
%macro k_fold_cv(k=10);
ods select none;

proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score&i;
 merge true native est;
 retain id &i ;
 optimism=native-true;
run;
%end;
data k_fold_cv;
 set score1-score&k;
run;

ods select all;
%mend;

%k_fold_cv(k=10)








/*************************************/


%macro k_fold_cv_rep(r=1,k=10);
ods select none;
%do r=1 %to &r;
proc surveyselect data=sashelp.heart group=&k out=have;
run;

%do i=1 %to &k ;
data training;
 set have(where=(groupid ne &i)) ;
run;
data test;
 set have(where=(groupid eq &i));
run;

ods output 
Association=native(keep=label2 nvalue2 rename=(nvalue2=native) where=(label2='c'))
ScoreFitStat=true(keep=dataset freq auc rename=(auc=true));
proc logistic data=training
 outest=est(keep=_status_ _name_) ;
 class sex;
 model status(event='Alive')=sex height weight;
 score data=test fitstat; 
run;

data score_r&r._&i;
 merge true native est;
 retain rep &r id &i;
 optimism=native-true;
run;
%end;
%end;
data k_fold_cv_rep;
 set score_r:;
run;

ods select all;
%mend;

%k_fold_cv_rep(r=20,k=10);


/********************/
data all;
 set k_fold_cv k_fold_cv_rep indsname=indsn;
 length indsname $ 32;
 indsname=indsn;
run;
proc summary data=all nway;
 class indsname;
 var optimism;
 output out=want mean=mean lclm=lclm uclm=uclm;
run;

sas-innovate-2024.png

Join us for SAS Innovate April 16-19 at the Aria in Las Vegas. Bring the team and save big with our group pricing for a limited time only.

Pre-conference courses and tutorials are filling up fast and are always a sellout. Register today to reserve your seat.

 

Register now!

How to choose a machine learning algorithm

Use this tutorial as a handy guide to weigh the pros and cons of these commonly used machine learning algorithms.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 2 replies
  • 5470 views
  • 4 likes
  • 3 in conversation