Would anyone be able to assist me with the following test results? I was attempting to split my data into a 70/30 training/test data set, but when I run the results, the only set that comes up for me is the "70%" training results even though I can see that it removed 200+ observations for the test data set. How do I get SAS to produce the remaining data set of results for me? I was told it would do so automatically, but all that comes up is the first set. Thanks so much title ; proc contents data=mydata.credit_approval; run; quit; proc print data=mydata.credit_approval; run; quit; data temp; set mydata.credit_approval; * Training/testing observations; u=uniform(123); if (u<0.7) then train=1; else train=0; if A16='+' then Y=1; else if A16='-' then Y=0; else Y=.; * Create a response indicator based on the training/testing split; if (train=1) then Y_train=Y; else Y_train=.; /* Dummy Variables from Assignment 5 */ * A1 Base category is A1='a'; if (A1='b') then A1_b=1; else A1_b=0; * A4 Base category is: l,y; if (A4='u') then A4_u=1; else A4_u=0; * A5 Base caetgory is: gg,p; if (A5='g') then A5_g=1; else A5_g=0; * A6 Base category is: d,e,j,r; if (A6='aa') then A6_aa=1; else A6_aa=0; if (A6='c') then A6_c=1; else A6_c=0; if (A6='cc') then A6_cc=1; else A6_cc=0; if (A6='ff') then A6_ff=1; else A6_ff=0; if (A6='i') then A6_i=1; else A6_i=0; if (A6='k') then A6_k=1; else A6_k=0; if (A6='m') then A6_m=1; else A6_m=0; if (A6='q') then A6_q=1; else A6_q=0; if (A6='w') then A6_w=1; else A6_w=0; if (A6='x') then A6_x=1; else A6_x=0; * A7 Base category is: dd,j,n,o,z; if (A7='bb') then A7_bb=1; else A7_bb=0; if (A7='ff') then A7_ff=1; else A7_ff=0; if (A7='h') then A7_h=1; else A7_h=0; if (A7='v') then A7_v=1; else A7_v=0; if (A9='t') then A9_t=1; else A9_t=0; if (A10='t') then A10_t=1; else A10_t=0; if (A12='t') then A12_t=1; else A12_t=0; * A13 Base category is: p,s; if (A13='g') then A13_g=1; else A13_g=0; *Discretize the Continuous Variables; if (A2 < 20) then A2_discrete=1; else if (A2 < 25) then A2_discrete=2; else if (A2 < 35) then A2_discrete=3; else if (A2 < 45) then A2_discrete=4; else A2_discrete=5; if (A3 < 1) then A3_discrete=1; else if (A3 < 2) then A3_discrete=2; else if (A3 < 5) then A3_discrete=3; else if (A3 < 10) then A3_discrete=4; else A3_discrete=5; if (A8 < 0.5) then A8_discrete=1; else if (A8 < 1) then A8_discrete=2; else if (A8 < 2) then A8_discrete=3; else if (A8 < 5) then A8_discrete=4; else if (A8 < 10) then A8_discrete=5; else A8_discrete=6; if (A11 < 1) then A11_discrete=1; else if (A11 < 3.01) then A11_discrete=2; else A11_discrete=3; if (A14 < 100) then A14_discrete=1; else if (A14 < 150) then A14_discrete=2; else if (A14 < 250) then A14_discrete=3; else if (A14 < 350) then A14_discrete=4; else A14_discrete=5; if (A15 < 1.5) then A15_discrete=1; else if (A15 < 50) then A15_discrete=2; else if (A15 < 100) then A15_discrete=3; else if (A15 < 200) then A15_discrete=4; else if (A15 < 4000) then A15_discrete=5; else A15_discrete=6; * Delete the observations with missing values; if (A1='?') or (A4='?') or (A5='?') or (A6='?') or (A7='?') or (A2=.) or (A3=.) or (A8=.) or (A11=.) or (A14=.) or (A15=.) then delete; run; proc logistic data=temp descending; model Y_train = A2 A3 A8 A11 A14 A15 A1_b A4_u A5_g A6_aa A6_c A6_cc A6_ff A6_i A6_k A6_m A6_q A6_w A6_x A7_bb A7_ff A7_h A7_v A9_t A10_t A12_t A13_g / selection=backward; output out=model_data pred=yhat; run; proc logistic data=temp descending; model Y_train = A9_t A2 A3 ; output out=model_data2 pred=yhat; run; * The descending option assigns the highest model scores to the lowest score_decile; proc rank data=model_data2 out=training_scores descending groups=10; var yhat; ranks score_decile; where train=1; run; * To create the lift chart run this exact code; proc means data=training_scores sum; class score_decile; var Y; output out=pm_out sum(Y)=Y_Sum; run; proc print data=pm_out; run; data lift_chart; set pm_out (where=(_type_=1)); by _type_; Nobs=_freq_; score_decile = score_decile+1; if first._type_ then do; cum_obs=Nobs; model_pred=Y_Sum; end; else do; cum_obs=cum_obs+Nobs; model_pred=model_pred+Y_Sum; end; retain cum_obs model_pred; * 201 represents the number of successes; * This value will need to be changed with different samples; pred_rate=model_pred/249; base_rate=score_decile*0.1; lift = pred_rate-base_rate; drop _freq_ _type_ ; run; proc print data=lift_chart; run; ods graphics on; axis1 label=(angle=90 '% Captured from Target Population'); axis2 label=('Total Population'); legend1 label=(color=black height=1 '') value=(color=black height=1 'Model #2' 'Random Guess'); title 'Model #2: In-Sample Lift Chart'; symbol1 color=green interpol=join w=2 value=dot height=1; symbol2 color=black interpol=join w=2 value=dot height=1; proc gplot data=lift_chart; plot pred_rate*base_rate base_rate*base_rate / overlay legend=legend1 vaxis=axis1 haxis=axis2; run; quit; ods graphics off;
... View more