In case anyone is still interested in this topic, I have resolved the problem of the variance of the random-effect solution (as given by ods output solutionr= in Proc Mixed) being somewhat (and sometimes a lot) less than the variance given by its corresponding covparm (as given by ods output covparms=). I did it by developing another simulation for a study of repeated measurements, where each subject (athletes) has several tests in one season followed by several tests in the next season. The random-effect structure is given by random int Season/subject=Athlete;. This model estimates differences between athletes (the solution for the random Intercept/subject=Athlete, changes within athletes between seasons (the solution for random Season/subject=Athlete), and changes within athletes within seasons (the residuals). The individual solution values for the random effects all have standard errors, and it turns out that the variance of the solution plus the mean standard error squared is equal to the variance given by the covparms. Even the residuals must have standard errors, but I don't know how to estimate those and I assume they account for the difference between the variance of the residuals and residual variance given by the covparms. See below for the code for this simulation. I included lots of code to check that the random-effect solution produces unbiased estimates, and that the 90% compatibility intervals for each solution value did indeed include the true (simulated) value 90% of the time. I could have done it all with the previous code for individual responses, but I wanted to simulate repeated testing of athletes for a project I am helping with at Olympiatoppen (the Norwegian Olympic Sports Center). I'll be using the residuals and possibly the random-effect solution for Season as linear predictors in another mixed model. Their linear effects will be attenuated by the standard errors in each value. These standard errors vary a little bit, depending on the number of repeated measurements each athlete has, but the mean will be given by the difference in the variances described above. I don't have to calculate it, because all I am interested in is the correction for attenuation of a linear predictor that has error; the correction factor is given by 1/ICC (ICC = intraclass correlation coefficient), and the ICC is given by (pure variance)/(observed variance), which here is the variance of the random-effect solution divided by the covparm variance. I won't be allowing negative variances with the upcoming data, so there won't be any problem with interpreting the random-effect solution. Will *simulates repeated tests within each of two seasons;
*some of these macro variables are irrelevant here, copied from another program;
%let Ssize=100; *number of subjects;
%let BtwnSD=4; *true differences between subjects, to be estimated by a random effect;
%let Mean=100; *true grand mean, irrelevant here;
%let WthnErr=2; *error within subjects between tests within seasons, to be estimated by the residual;
%let BtwnErr=1; *extra error within subjects between seasons, to be estimated by a random effect;
%let MaxTestsWthn=4; *maximum number of tests in each season;
%let alpha=0.1;
%let nob=; *make it nob=bound to allow negative variance;
%let convcrit=CONVH=1E-8 convf=1E-8; *make the values 1E-7, 1E-6 or smaller if fail to converge;
%let tvalue=3.5; *residual standardized threshold for outliers;
%let logflag=0; *set to 1 for log transformation of the dependent;
%let deceff=1; *decimal places for effects and SDs;
data dat1;
TestID=0;
do Athlete=1 to &Ssize;
YtrueRand=&BtwnSD*rannor(0);
do Season=1;
BtwnSeasErr=&BtwnErr*rannor(0);
do TestNo=1 to 2+int((&MaxTestsWthn-1)*ranuni(0));
WthnSeasErr=rannor(0)*&WthnErr;
Yobsvd=&Mean+YtrueRand+BtwnSeasErr+WthnSeasErr;
TestID=TestID+1;
output;
end;
end;
do Season=2;
BtwnSeasErr=&BtwnErr*rannor(0);
if ranuni(0)>0.5 then do; *this gives 50% chance of any values in Season 2;
*if ranuni(0)>0 then do; *always have values in Season 2;
*do TestNo=1 to 2+int((&MaxTestsWthn-1)*ranuni(0)); *on average, as many as in Season 1;
do TestNo=1 to 1+int((&MaxTestsWthn-1)*ranuni(0)); *sometimes only one test in Season 2;
WthnSeasErr=rannor(0)*&WthnErr;
Yobsvd=&Mean+YtrueRand+BtwnSeasErr+WthnSeasErr;
TestID=TestID+1;
output;
end;
end;
end;
end;
title "Values for first three athletes";
proc print data=dat1;
where Athlete<4;
format YtrueRand BtwnSeasErr WthnSeasErr Yobsvd 5.2;
run;
ods select none;
title "Repeated tests over two seasons, mixed model";
proc mixed data=dat1 covtest cl alpha=&alpha &nob &convcrit;
class Athlete Season;
model Yobsvd=/s outp=pred residual alphap=&alpha ddfm=sat ;
random int Season/subject=Athlete s cl alpha=α * type=un;*random intercepts and slopes model;
*lsmeans Mean Gender/cl alpha=α
*estimate "Male/female" Gender 1 -1/cl alpha=α
ods output covparms=cov;
ods output estimates=est;
ods output lsmeans=lsm;
ods output solutionr=solr;
ods output solutionf=solf;
ods output classlevels=clev;
*by Sport;
*where Sport="Endur";
run;
ods select all;
data cov1;
set cov;
DegFree=2*Zvalue**2;
title2 "Random effects as variances";
title3 "True values: Intercept=&BtwnSD**2; Season=&BtwnErr**2; Residual=&WthnErr**2";
proc print data=cov1;
format _numeric_ 5.&deceff DegFree 5.0;
run;
title2 "Variance of residuals";
proc means data=pred noprint;
var resid;
output out=pred1(drop=_type_ _freq_) n=NoOfObs var=Variance;
run;
proc sort data=solr;
by Effect;
title2 "First 10 observations of Intercept random-effect solution";
proc print data=solr(obs=10);
where Effect="Intercept";
run;
title2 "First 10 observations of Season random-effect solution";
proc print data=solr(obs=10);
where Effect="Season";
run;
data solr0;
set solr;
SEsq=StdErrPred**2;
title2 "Variance of random-effect solutions";
proc means data=solr0 noprint;
var estimate SEsq;
by Effect;
output out=solr1(drop=_type_ _freq_ d) n=NoOfObs var=Variance mean=d SEsq;
where estimate ne 0;
run;
*proc print;run;
data solresid;
set solr1 pred1;
if Effect="" then Effect="Residual";
data all;
merge solresid cov1(keep=estimate degfree rename=(Estimate=CovParmVariance));
VarianceAdjustDF=Variance*(NoOfObs-1)/DegFree;
VarPlusSEsq=Variance+SEsq;
EstimatedSEsq=CovParmVariance-Variance;
CorrAttenFactor=CovParmVariance/Variance;
title2 "Variance of random-effect solutions and residuals, and covparm variance";
title3 "SEsq is the mean of StdErrPred squared of the solutions";
title4 "The covparm variance is evidently the sum of the variance and SEsq";
title5 "Hence the estimate of SEsq for the residual variance (I can't estimate error in each residual)";
title6 "When a random-effect solution or residuals are used as linear predictors, their effects";
title7 "will need to be corrected upwards by CorrAttenFactor = CovParmVariance/Variance = 1/ICC.";
title8 "Variance adjusted for DF does not work: it's always greater than the covparm variance";
proc print;
var Effect NoOfObs Variance SEsq VarPlusSEsq CovParmVariance EstimatedSEsq
DegFree VarianceAdjustDF CorrAttenFactor;
format _numeric_ 5.&deceff CorrAttenFactor 5.2 DegFree 5.0;
run;
title2 "Check individual true values of Season against random-effect solution";
*proc print data=solr;run;
data season;
set solr;
if Effect="Season";
keep Athlete Estimate Season Lower Upper;
*proc print;run;
proc sort data=dat1;
by Athlete Season;
data dat2;
set dat1;
if lag(Athlete) ne Athlete or lag(Season) ne Season;
keep Athlete Season BtwnSeasErr;
*proc print;
run;
data season1;
merge season dat2;
by Athlete Season;
if BtwnSeasErr ne .; *needed when a subject has no test in Season 2;
if BtwnSeasErr<Lower or BtwnSeasErr>Upper then Type0error="***";
*proc print;run;
title3 "Type0 error (i.e., when 90%CI for individual estimates does not include true value)";
title4 "Expected value is 10%";
proc freq;
tables Type0error/missing nocum;
run;
title3 "Expected true BtwnSeasErr: StdDev=&BtwnErr; variance=&BtwnErr**2";
proc means n mean std var maxdec=&deceff;
var Estimate BtwnSeasErr;
run;
title3 "True value (BtwnSeasErr) vs solution (Estimate)";
title4 "Line of identity shown in black, indicating no bias in the estimated value";
title5 "(Rerun the analysis to show that any apparent bias is just sampling variation.)";
ods graphics / reset width=14cm height=12cm imagemap attrpriority=none;
proc sgplot data=season1; *uniform=all;
styleattrs
datacolors=(red blue white)
datacontrastcolors=(black)
datasymbols=(circlefilled); *needs a group= for styleattrs to work;
*scatter x=pred y=Resid/markerattrs=(size=6 symbol=circlefilled) filledoutlinedmarkers
group=PossessionType;
scatter x=Estimate y=BtwnSeasErr/markerattrs=(size=8 color=black symbol=circlefilled) filledoutlinedmarkers MARKERFILLATTRS=(color=lightgreen); * group=Gender;
scatter x=BtwnSeasErr y=BtwnSeasErr/markerattrs=(size=3 color=black symbol=circlefilled);
*scatter x=pred y=Resid/markerattrs=(size=8 color=black symbol=circlefilled) filledoutlinedmarkers MARKERFILLATTRS=(color=lightgreen);
*scatter x=StrokeNo y=Resid/markerattrs=(size=3 color=black symbol=circlefilled) filledoutlinedmarkers MARKERFILLATTRS=(color=black);
refline 0/axis=y lineattrs=(pattern=dot thickness=1 color=black);
refline 0/axis=x lineattrs=(pattern=dot thickness=1 color=black);
reg x=Estimate y=BtwnSeasErr/degree=1 lineattrs=(pattrn=solid thickness=1 color=blue) legendlabel="linear" nomarkers;
*reg x=pred y=Resid/degree=2 lineattrs=(thickness=1 color=red) legendlabel="quadratic" nomarkers;
*by Sport;
run;
ods graphics / reset;
... View more