Correction I confused the outmodel statement with the code statement proc logistic data=admgpa10e3 descending plots=roc;
model admit(event='1') = gpa/lackfit rsq;
code file='c:\temp\logitScore.sas';
run; Using the relatively new code file option with several modeling procs Question: What is the probability Achme University will acccept me. http://www.ats.ucla.edu/stat/data/binary.csv http://blogs.sas.com/content/iml/2014/02/19/scoring-a-regression-model-in-sas.html HAVE You have two datasets, one with 100,000 students containing high school GPAs and a binary 0/1 admission to Achme University. The second dataset contains just GPAs for 10,000,000 million high school students for all the universities in the US. You want to know how may students in the 10,000,000 have a 66% chance of being admitted to Achme University Note: There are at least two other ways to score the data, outmodel or adding the 10,000,000 GPs with a missing admit variable. The code file is the most flexible? HAVE Up to 40 obs WORK.ADMGPA total obs=100,000 Obs GPA ADMIT 1 3.6 0 2 3.6 1 3 4.0 1 4 3.1 1 5 2.9 0 6 3.0 1 7 2.9 1 8 3.0 0 9 3.3 1 10 3.9 0 .... Second dataset Up to 40 obs from admgpa total obs=10,000,000 Obs GPA 1 3.8 2 3.1 3 3.3 4 3.2 5 3.3 6 4.0 7 3.1 8 2.9 9 3.6 10 2.8 WANT 10% of the US High School have at least 66% chance of admission to Achme University TOTSUBMIT=10,000,000 TOTADMIT=1,000,000 PCTELIGIBLE=10% * 10,000 Achme admission dataset; data admgpa10e3; input admit gpa @@; cards4; 0 3.6 1 3.7 1 4.0 1 3.2 0 2.9 1 3.0 1 3.0 0 3.1 1 3.4 0 3.9 0 4.0 0 3.2 1 4.0 0 3.1 1 4.0 0 3.4 0 3.9 0 2.6 0 3.8 1 3.8 0 3.2 1 3.6 0 2.8 0 3.2 1 3.4 1 3.7 1 3.6 1 3.7 1 3.2 0 3.3 0 3.8 0 3.4 0 3.4 1 4.0 0 3.1 0 3.1 0 3.3 0 2.9 1 3.1 1 2.7 0 2.4 1 3.3 1 3.2 0 3.3 0 2.9 1 3.5 1 3.5 0 3.0 0 2.5 0 3.4 ;;;; run;quit; * 10,000,000 US admission dataset; data admgpa10e6; format gpa 3.1; input gpa @@; cards4; 3.9 3.1 3.4 3.3 3.3 4.0 3.2 2.9 3.7 2.8 3.2 3.3 3.7 3.9 4.0 3.6 3.6 3.3 3.7 3.7 4.0 2.9 3.4 4.0 3.5 4.0 3.4 4.0 3.1 4.0 2.9 3.1 2.7 2.9 3.6 3.0 3.3 3.5 3.3 4.0 3.8 3.6 3.9 2.9 3.4 3.3 3.5 3.6 2.9 3.3 3.2 3.6 3.3 3.9 4.0 3.0 3.6 3.1 2.9 3.5 3.1 3.4 3.0 3.2 3.8 4.0 3.5 3.7 3.7 2.9 3.7 2.7 2.9 3.0 3.9 3.4 3.5 3.7 3.2 3.2 3.2 2.8 3.4 3.1 3.0 3.6 3.3 4.0 3.4 3.6 3.9 3.5 3.9 3.4 3.4 3.4 3.4 2.7 2.9 3.3 3.7 3.4 3.9 3.5 3.7 2.9 2.5 3.6 3.5 3.8 3.1 3.5 3.6 2.7 3.3 4.0 3.2 3.8 4.0 3.6 3.5 2.8 3.5 3.4 3.5 3.4 2.6 3.2 3.3 3.0 3.8 3.9 4.0 3.8 2.8 3.6 3.6 2.9 3.1 3.4 2.9 3.5 3.8 3.6 ;;;; run;quit; proc logistic data=admgpa10e3 descending plots=roc; model admit(event='1') = gpa/lackfit rsq; code file='c:\temp\logitScore.sas'; run; /* Logistic output Association of Predicted Probabilities and Observed Responses Percent Concordant 64.9 Somers' D 0.354 Percent Discordant 29.5 Gamma 0.375 Percent Tied 5.5 Tau-a 0.178 Pairs 616 c 0.677 */ * compute eligible; data _null_; retain totadmit 0; set admgpa10e6 end=dne; %include 'c:\temp\logitScore.sas'; if P_ADMIT1>.66 then totadmit=totadmit+1; if dne then do; totsubmit=_n_; pctselected=totadmit/totsubmit; put totsubmit= totadmit= pctselected= percent.; end; ;run;quit; BACKGROUND CODE FILE (from logistic) *****************************************; ** SAS Scoring Code for PROC Logistic; *****************************************; label P_EVENT = 'Predicted: Event' ; label P_NONEVENT = 'Predicted: NonEvent' ; drop _LMR_BAD; _LMR_BAD=0; *** Check interval variables for missing values; if nmiss(X) then do; _LMR_BAD=1; goto _SKIP_000; end; *** Generate design variables for DRUG; drop _0_0 _0_1 _0_2 _0_3 _0_4 ; _0_0= 0; _0_1= 0; _0_2= 0; _0_3= 0; _0_4= 0; length _st8 $ 8; drop _st8; _st8 = left(trim(put (DRUG, $8.))); _dm_find = 0; drop _dm_find; if _st8 <= 'C' then do; if _st8 <= 'B' then do; if _st8 = 'A' then do; _0_0 = 1; _dm_find = 1; end; else do; if _st8 = 'B' then do; _0_1 = 1; _dm_find = 1; end; end; end; else do; if _st8 = 'C' then do; _0_2 = 1; _dm_find = 1; end; end; end; else do; if _st8 = 'D' then do; _0_3 = 1; _dm_find = 1; end; else do; if _st8 = 'E' then do; _0_4 = 1; _dm_find = 1; end; end; end; if not _dm_find then do; _0_0 = .; _0_1 = .; _0_2 = .; _0_3 = .; _0_4 = .; _LMR_BAD=1; goto _SKIP_000; end; *** Compute Linear Predictors; drop _LP0; _LP0 = 0; _LP0 = _LP0 + (1.97936434079353) * X; *** Effect: DRUG; _TEMP = 1; _LP0 = _LP0 + (-2.89553671611902) * _TEMP * _0_0; _LP0 = _LP0 + (-2.01616789170953) * _TEMP * _0_1; _LP0 = _LP0 + (-3.79486594917064) * _TEMP * _0_2; _LP0 = _LP0 + (-0.85483460873143) * _TEMP * _0_3; _LP0 = _LP0 + (0) * _TEMP * _0_4; *** Predicted values; drop _P0 _P1; _TEMP = 0.27925293123804 + _LP0; if (_TEMP < 0) then do; _TEMP = exp(_TEMP); _P0 = _TEMP / (1 + _TEMP); end; else _P0 = 1 / (1 + exp(-_TEMP)); _P1 = 1.0 - _P0; P_EVENT = _P0; P_NONEVENT = _P1; _SKIP_000: if _LMR_BAD=1 then do; P_EVENT = .; P_NONEVENT = .; end; drop _TEMP; * GET ONLINE DATA; %utl_submit_r(%nrbquote( library(foreign); mydata <- read.csv('http://www.ats.ucla.edu/stat/data/binary.csv'); head(mydata); write.dta(mydata[,c(1,3)],convert.factors='string',version = 10L, 'c:\\temp\\mydata.dta'); )); proc import out=gpa file='c:\\temp\\mydata.dta' replace; ;run;quit; options ls=64; data admgpa; format gpa 3.1; set gpa(firstobs=51 obs=200 keep=gpa); put gpa @@; ;run;quit; run; * Kitchen sink; ods select Logistic.ModelInfo ; ods select Logistic.NObs ; ods select Logistic.ResponseProfile ; ods select Logistic.ConvergenceStatus ; ods select Logistic.FitStatistics ; ods select Logistic.GlobalTests ; ods select Logistic.ParameterEstimates ; ods select Logistic.OddsRatios ; ods select Logistic.LackFit.LackFitPartition ; ods select Logistic.LackFit.LackFitChiSq ; ods select Logistic.PhatPlots ; ods select Logistic.LeveragePlots ; ods select Logistic.DPCPlot ; ods select Logistic.rocoverlay ; ods select ROCOverlay ; ods output ParameterEstimates=ffp_btafin; proc logistic data=ffp.ffp_chrnumtwo descending plots=roc plots(only label)=(phat leverage dpc roc); model allrsp(event='1')=&ffp_potvar/lackfit rsq; %array(ffp_roc,values=&ffp_potvar); %do_over(ffp_roc,phrase=roc "?" ?;) output out=ffp_scrbst p=score res5440dev=ffp_resdev h=ffp_pii reschi=ffp_reschi difchisq=ffp_difchi; run;quit;
... View more