BookmarkSubscribeRSS Feed

[Logistic] Surveyselect 프로시져를 활용한 Bagging Logistic 모형 구축

Started ‎06-15-2020 by
Modified ‎06-15-2020 by
Views 106

* 표본추출 및 관리 매뉴얼(한국보건사회연구원);

 

* MODEL VALIDATION TECHNIQUES

  : http://www.listendata.com/2015/01/model-validation-in-logistic-regression.html

 

* Don't Be Loopy: Re-Sampling and Simulation the SAS® Way

  : http://www2.sas.com/proceedings/forum2007/183-2007.pdf

 

* Lab Eight: implementing the bootstrap and 10-fold CV in SAS

  : https://web.stanford.edu/~kcobb/hrp261/lab8.doc;

 

* 예제 데이터 : https://web.stanford.edu/~kcobb/hrp261/kyphosis.sas7bdat;

 

 

LIBNAME lab8 'C:\work\SAS\기타';

 

%LET nsamples=20;

 

PROC SORT DATA=lab8.kyphosis OUT=kyphosis;

     BY Kyphosis;

RUN;

 

ods select none;

* OUTHITS : 동일한 단위가 표본에 1회이상 포함된 경우표시;

* URS : 복원, SRS : 비복원;

* strata : 개별 표본 별 타겟 비율 유지;

proc surveyselect data=kyphosis method=urs n=83 rep=&nsamples. out=boot SEED=1234;

     strata Kyphosis;

run;

ods select all;

 

PROC SQL;

  SELECT SUM(CASE WHEN Kyphosis=1 THEN 1 ELSE 0 END) / COUNT(*) AS RATIO

  FROM   lab8.kyphosis;

QUIT;

 

PROC SQL;

  SELECT Replicate,

         SUM(CASE WHEN Kyphosis=1 THEN 1 ELSE 0 END) / COUNT(*) AS RATIO

  FROM   boot

  GROUP BY 1;

QUIT;

 

proc sort data=boot;

     by Replicate; 

run;

 

proc logistic data=boot outest=est outmodel=LOGIT_MODEL noprint;

     by    replicate;

     model kyphosis (event="1")= age number start;

     freq  numberhits;

run;

 

 

/* 테스트 데이터 SCORE 산출 */

 

* SCORE 프로시져(TEST 데이터 SCORE 산출);

proc score data=kyphosis(rename=(Kyphosis=Kyphosis_1)) out=SCORE_OUT score=est type=parms;

     by  replicate;

     var age number start;

run;

 

data RESULT_1;

 set SCORE_OUT;

     prob=exp(Kyphosis)/(1+exp(Kyphosis));

run;

 

* logistic 프로시져 INMODEL(TEST 데이터 SCORE 산출);

proc logistic inmodel=LOGIT_MODEL;

     by replicate;

     score data=kyphosis(rename=(Kyphosis=Kyphosis_1)) out=RESULT_2;

run;

 

* TEST 데이터 세트의 개별 SCORE 평균값 계산;

PROC SQL;

  CREATE TABLE RESULT_PREV AS

    SELECT ID,

           AVG(PROB) AS AVG_SOCRE

    FROM   RESULT_1

    GROUP BY 1;

QUIT;

 

*********************************************;

/* CROSS-VALIDATION */

*********************************************;

%let K=10;

%let rate=%syseval‎f((&K-1)/&K);

%PUT &RATE;

 

/* generate the cross-validation sample */

proc surveyselect data=kyphosis out=xv seed=495857

                  samprate=&RATE outall rep=&K method=urs ;

run;

 

data xv;

 set xv;

     if selected then new_Kyphosis=Kyphosis;

run;

 

/* get predicted values for the missing new_y in each replicate */

 

proc logistic data=xv outest=est outmodel=LOGIT_MODEL outest=est noprint;

     by    replicate;

     model new_Kyphosis (event="1")= age number start;

     freq  numberhits;

     output out=out1(where=(new_Kyphosis=.)) p=yhat;

run;

 

*********************************************;

/* do LOOCV */

*********************************************;

data xv;

     do replicate = 1 to numrecs;

        do rec = 1 to numrecs;

           set kyphosis nobs=numrecs point=rec;

           if replicate ^= rec then new_Kyphosis=Kyphosis; else new_Kyphosis=.;

           OUTPUT;

        end;

     end;

     STOP;

 run;

 

 

 proc logistic data=xv outest=est outmodel=LOGIT_MODEL outest=est noprint;

     by    replicate;

     model new_Kyphosis (event="1")= age number start;

     output out=out1(where=(new_Kyphosis=.)) p=yhat;

run;

 

 

 

 

*********************************************;

* 참고;

*********************************************;

 

PROC SORT DATA=kyphosis;

     BY ID;

RUN;

 

/* 테스트 데이터 10 개 생성 : 비복원 추출로 아래 코드와 동일 로직 */

proc surveyselect data=kyphosis method=SRS n=83 rep=10 out=TEST_DATA SEED=1234 NOPRINT;

run;

 

data TEST_DATA_1;

     do replicate = 1 to &nsamples;

        do rec = 1 to numrecs;

           set kyphosis nobs=numrecs point=rec;

           OUTPUT;

        end;

     end;

     STOP;

 run;

Version history
Last update:
‎06-15-2020 01:29 AM
Updated by:
Contributors

SAS Innovate 2025: Register Now

Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!

Article Labels
Article Tags