* 연속형 변수를 사용한 결측치 대체;
data Fitness1;
input Oxygen RunTime RunPulse @@;
datalines;
44.609 11.37 178 45.313 10.07 185
54.297 8.65 156 59.571 . .
49.874 9.22 . 44.811 11.63 176
. 11.95 176 . 10.85 .
39.442 13.08 174 60.055 8.63 170
50.541 . . 37.388 14.03 186
44.754 11.12 176 47.273 . .
51.855 10.33 166 49.156 8.95 180
40.836 10.95 168 46.672 10.00 .
46.774 10.25 . 50.388 10.08 168
39.407 12.63 174 46.080 11.17 156
45.441 9.63 164 . 8.92 .
45.118 11.08 . 39.203 12.88 168
45.790 10.47 186 50.545 9.93 148
48.673 9.40 186 47.920 11.50 170
47.467 10.50 170
;
proc mi data=Fitness1 seed=3237851 noprint out=outmi;
var Oxygen RunTime RunPulse;
run;
proc glm data=outmi;
model Oxygen= RunTime RunPulse/inverse;
by _Imputation_;
ods output ParameterEstimates=glmparms
InvXPX=glmxpxi;
quit;
proc print data=glmparms (obs=6);
var _Imputation_ Parameter Estimate StdErr;
title 'GLM Model Coefficients (First Two Imputations)';
run;
proc print data=glmxpxi (obs=8);
var _Imputation_ Parameter Intercept RunTime RunPulse;
title 'GLM X''X Inverse Matrices (First Two Imputations)';
run;
proc mianalyze parms=glmparms xpxi=glmxpxi edf=28;
modeleffects Intercept RunTime RunPulse;
Ods output parameterestimates=parms_1;
run;
* parms_1 데이터 세트로 구현가능;
data parms_2;
_MODEL_ = 'missing';
_TYPE_ = 'PARMS';
Intercept = 92.700420;
RunTime = -3.030325;
RunPulse = -0.079621;
run;
proc score data=Fitness1 score=parms_2 out=RScoreP type=parms;
var RunTime RunPulse;
run;
* 범주형 변수를 포함한 결측치 대체;
data Fish2;
title 'Fish Measurement Data';
input Species $ Length Height Width @@;
datalines;
Bream 30.0 11.520 4.020 . 31.2 12.480 4.306
Bream 31.1 12.378 4.696 Bream 33.5 12.730 4.456
. 34.0 12.444 . Bream 34.7 13.602 4.927
Bream 34.5 14.180 5.279 Bream 35.0 12.670 4.690
Bream 35.1 14.005 4.844 Bream 36.2 14.227 4.959
. 36.2 14.263 . Bream 36.2 14.371 4.815
Bream 36.4 13.759 4.368 Bream 37.3 13.913 5.073
Bream 37.2 14.954 5.171 Bream 37.2 15.438 5.580
Bream 38.3 14.860 5.285 Bream 38.5 14.938 5.198
. 38.6 15.633 5.134 Bream 38.7 14.474 5.728
Bream 39.5 15.129 5.570 . 39.2 15.994 .
Bream 39.7 15.523 5.280 Bream 40.6 15.469 6.131
. 40.5 . . Bream 40.9 16.360 6.053
Bream 40.6 16.362 6.090 Bream 41.5 16.517 5.852
Bream 41.6 16.890 6.198 Bream 42.6 18.957 6.603
Bream 44.1 18.037 6.306 Bream 44.0 18.084 6.292
Bream 45.3 18.754 6.750 Bream 45.9 18.635 6.747
Bream 46.5 17.624 6.371
Pike 34.8 5.568 3.376 Pike 37.8 5.708 4.158
Pike 38.8 5.936 4.384 . 39.8 . .
Pike 40.5 7.290 4.577 Pike 41.0 6.396 3.977
. 45.5 7.280 4.323 Pike 45.5 6.825 4.459
Pike 45.8 7.786 5.130 Pike 48.0 6.960 4.896
Pike 48.7 7.792 4.870 Pike 51.2 7.680 5.376
Pike 55.1 8.926 6.171 . 59.7 10.686 .
Pike 64.0 9.600 6.144 Pike 64.0 9.600 6.144
Pike 68.0 10.812 7.480
;
proc mi data=Fish2 seed=1305417 out=outfish;
class Species;
monotone reg (Width)
discrim( Species= Length Height Width);
var Length Height Width Species;
run;
proc mixed data=outfish;
class Species;
model Length= Species Height Width/ solution covb;
by _Imputation_;
ods output SolutionF=mxparms CovB=mxcovb;
run;
proc print data=mxparms (obs=10);
var _Imputation_ Effect Species Estimate StdErr;
title 'MIXED Model Coefficients (First Two Imputations)';
run;
proc mianalyze parms(classvar=full)=mxparms;
class Species;
modeleffects Intercept Species Height Width;
Ods output parameterestimates=parms_1;
run;
data parms_2;
set parms_1;
length pred_1 $3000.;
if parm = 'Intercept' then pred_1 = compress(Estimate);
else if Species = '' and Estimate >= 0 then pred_1 = cats("+",Estimate,"*",compress(Parm));
else if Species = '' and Estimate < 0 then pred_1 = cats(Estimate,"*",compress(Parm));
else if Estimate >= 0 then pred_1 = cats("+",Estimate,"*(",parm,"='",compress(Species),"')");
else pred_1 = cats(Estimate,"*(",parm,"='",compress(Species),"')");
RUN;
proc transpose data=parms_2 out=parms_3;
var pred_1;
where estimate NE 0;
run;
data _nuLL_;
set parms_3;
length parms_1 $3000.;
parms_1 = catt(of col:);
call symput('pred3',parms_1);
%put &pred3;
run;
%put &pred3;
DATA MISSING;
SET Fish2;
NEW_Length = &pred3;
RUN;