Hi,
I wrote a simulation in SAS/IML but I am having trouble getting the ASD (Approximation Sample Distribution) by group within SAS IML using some of the [,:] function. Any help would be greatly appreciated. My other questions as referenced in the codes are as follow:
Question 1) Is there a better to write this loop. I am looping over time and there are two replicate samples. The code looks funny and I wondering if there is a better way to write it so that it.
Question 2) I have used an example from the Simulating data with SAS book and used it create my IDs. I just wonder if this is the write way to do it.
Question 3) How to create an ordered matrix that one can readily write into a SAS dataset? In my example, I have had to create the dataset then read it as a matrix, then sort it before creating the final dataset.
Question 4). This is the main question. I wanted to know how to get the ASD by groups (i.e by replicate samples, by time and by replicate samples and by time) from the matrix directly within SAS IML without having to do it using proc means and proc univariate.
Data param;
input replicate pr_pa0 pr_pa0_ith or_pa_ith;
datalines;
1 0.30 0.40 2.0
2 0.20 0.33 1.4
;
run;
proc iml;
/*Functions*/
Start logit(Pr);
n = log(Pr/(1-Pr));
return(n);
finish;
/*parameters*/
use param;
read all var _ALL_;
close param;
rep=2; *number of replications;
N =5; *size each the sample;
time_length=10; *number of timepoints;
time = T( do(1, time_length, 1));
/*Baseline*/
pa0=j(rep,N);
call randgen(pa0, "Bernoulli", pr_pa0); /*initialize the obeservations at baseline*/
pa = pa0 // j(rep*(time_length-1), N,.); /*create a vector of empty cells to be filled in follow-up*/
/*Follow-up*/
pa_ith = j(rep, N); /*temporary variable to store ith value*/;
do i = 2 to time_length; /*Question 1*/
r=rep;
p=i-1;
call randgen(pa_ith, "Bernoulli", logistic(logit(pr_pa0_ith) + or_pa_ith#pa[(r*(p-1) + 1): r*p, ]));
pa[(r*p + 1): r*(p+1), ] = pa_ith;
end;
print pa;
/*creating an id variable*/ /*Question 2*/
id=repeat(1:N, rep*time_length,1);
timeid=repeat(T(1:time_length), 1, rep*N);
sampid=repeat(T(1:rep), time_length, N);
/*creating a dataset or sorting the dataset by sampid id timeid*/
create simreg var {sampid id timeid pa};
append;
close simreg;
use simreg;
read all var _NUM_ into m[colname=VarNames]; /*Question 3*/
close simreg;
call sort(m, {sampid id timeid });
create simreg2 from m [colname=varNames];
append from m;
close simreg2;
quit;
/*Getting the approximate simulation distribution*/
proc sort data=simreg2; by sampid timeid;run; /*Question 4*/
proc means data=simreg2 noprint;
by sampid timeid;
var pa;
output out=Outstats mean=Sample_mean;
run;
proc sort data=outstats; by timeid;
proc univariate data=outstats noprint;
by timeid;
var sample_mean;
output out=results
mean=Sample_mean
pctlpts=2.5 97.5
pctlpre =pctl_mean
std = sd_Mean;
run;
proc print data= results;
run;
Thank you