As a result of simulation below some cell sizes are not sufficient for further analysis. Sometimes, one of the cell happen to have zero observations and the program quits, without completeing number of cycles (here 100). Is there any way I can force the program to continue cycles until it ends? And is there any way I can put constraints on the minum cell size. I keep getting this error:
ERROR: (execution) Matrix has not been set to a value.
operation : [ at line 3130 column 11
operands : t2, idx2, *LIT1051
T2 1200 rows 1 col (numeric)
idx2 0 row 0 col (type ?, size 0)
*LIT1051 1 row 1 col (numeric)
1
statement : ASSIGN at line 3130 column 9
Here is the simulation code.
option symbolgen;
** define parameters;
%let N = 300; ** total obs;
%let NumSamples = 100;
%let beta01 = 0;
%let beta02 = log(6/4);
%let beta11 = log(2); *relationship of covariate X with T=1;
%let beta12 = log(5); *relationship of covariate X with T=2;
%let alpha0 = 0; ** intercept, T=3 effect;
%let alpha1 = 0.2; ** T=1 effect;
%let alpha2 = 0.4; ** T=2 effect;
%let alphaX = 0.2; ** X effect;
** simulate data;
proc iml;
** assign variable names and allocate space for the data and parameters;
varNamesData={SampleID x t t1 t2 t3 y};
varNamesParms={SampleID N PN1 PN2 PN3 beta01 beta02 beta03 beta11 beta12 beta13 alpha0 alpha1 alpha2 alphaX varY};
TempSimData = J(&N, NCOL(varNamesData));
TempSimParms = J(1, NCOL(varNamesParms));
create SimData from TempSimData[c=varNamesData];
create SimParms from TempSimParms[c=varNamesParms];
** simulation loop;
do SampleID = 1 to &NumSamples;
call RANDSEED(0);
** allocate space and generate x;
x = J(&N, 1);
call RANDGEN(x, "NORMAL", 1, 1);
** define linear equations;
eta13 = &beta01 + &beta11 * x; *T=1 vs T=3;
eta23 = &beta02 + &beta12 * x; *T=2 vs T=3;
** find actual probabilities for subjects to be in each treatment level;
pi1 = exp(eta13) / (1 + exp(eta13) + exp(eta23));
pi2 = exp(eta23) / (1 + exp(eta13) + exp(eta23));
pi3 = 1 / (1 + exp(eta13) + exp(eta23));
** allocate space for treatment and actual probabilities in matrix form;
t = J(&N, 1);
p = J(&N, 3);
** fill the probability matrix from pi1, pi2, and pi3;
p[,1] = pi1;
p[,2] = pi2;
p[,3] = pi3;
** generate treatment levels;
call RANDGEN(t , "TABLE", p);
** create dummy variables for treatment levels;
t1 = J(&N, 1, 0);
t2 = J(&N, 1, 0);
t3 = J(&N, 1, 0);
idx1 = LOC(t=1);
t1[idx1]=1;
idx2 = LOC(t=2);
t2[idx2]=1;
idx3 = LOC(t=3);
t3[idx3]=1;
** allocate space for outcome and residuals;
y = J(&N,1);
epsilon = J(&N, 1);
** generate residuals such that variance of y will approximately be 1;
SigmaEpsilon = SQRT(1-sum(cov(&alpha1*t1||&alpha2*t2||&alphaX*x)));
call RANDGEN(epsilon, "NORMAL", 0, SigmaEpsilon);
** generate y;
y = &alpha0 + &alpha1*t1 + &alpha2*t2 + &alphaX*x + epsilon;
** create a temporary simulated data for each simulation loop;
TempSimData = J(&N, NCOL(varNamesData));
TempSimData[,1] = SampleID;
TempSimData[,2] = x;
TempSimData[,3] = t;
TempSimData[,4] = t1;
TempSimData[,5] = t2;
TempSimData[,6] = t3;
TempSimData[,7] = y;
setout SimData;
append from TempSimData;
** define additional parameters;
idxN1 = LOC(t=1);
N1 = COUNTN(t[idxN1]);
idxN2 = LOC(t=2);
N2 = COUNTN(t[idxN2]);
idxN3 = LOC(t=3);
N3 = COUNTN(t[idxN3]);
VarY = VAR(y);
PN1 = N1/&N;
PN2 = N2/&N;
PN3 = N3/&N;
** save temporary parameters for each simulation loop;
TempSimParms = J(1, NCOL(varNamesParms));
TempSimParms[,1] = SampleID;
TempSimParms[,2] = &N;
TempSimParms[,3] = PN1;
TempSimParms[,4] = PN2;
TempSimParms[,5] = PN3;
TempSimParms[,6] = &beta01;
TempSimParms[,7] = &beta02;
TempSimParms[,8] = &beta03;
TempSimParms[,9] = &beta11;
TempSimParms[,10] = &beta12;
TempSimParms[,11] = &beta13;
TempSimParms[,12] = &alpha0;
TempSimParms[,13] = &alpha1;
TempSimParms[,14] = &alpha2;
TempSimParms[,15] = &alphaX;
TempSimParms[,16] = varY;
setout SimParms;
append from TempSimParms;
end;
close SimData;
close SimParms;
quit;
... View more