Hi,
I am currently working on a project where i converted a simulation from excel file to SAS EG. I have reduced the running time from 12hours (Excel VBA) to approx. 8 hours (SAS EG). However, my goal is to have this program run successfully under 1 hour.
Would like to know if this is possible?
Below are my codes:
data a (drop=i);
do i = 1 to 1000000;
n = i;
output;
end;
run;
data a(drop=w x y af1 af2);
set a;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_1 = af1;
call symputx('sim_1'||left(n),sim_1,'g');
run;
%macro sim_2();
data source2 (keep= sim_ML sim_GL);
set source1;
do until (w_m<1);
x_m = 2 *rand("Uniform") - 1;
y_m = 2 *rand("Uniform") - 1;
w_m= x_m**2 + y_m**2;
end;
w_m = Sqrt((-2 * (Log(w_m))) / w_m);
af1_m = 1 * x_m * w_m + 0;
af2_m = 1 * y_m * w_m + 0;
sim_m1 = af1_m;
sim_m2 = af2_m;
if mod(n,2)=0 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0;
end;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_g1 = af1;
sim_g2 = af2;
if mod(n,2)=0 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GLs = E*L; else sim_GL = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0;
end;
run;
%mend;
%macro mcarlo();
%do i = 1 %to 1000000;
%sim_2;
%if &i = 1 %then %do;
proc sql noprint;
create table sum_l_&i as select sum(sim_ML) as sum_ML, sum(sim_GL) as sum_GL, &i as n from source2;
quit;
%end;
%else %if &i > 1 %then %do;
proc sql noprint;
create view sum_l as select sum(sim_ML) as sum_ML, sum(sim_GL) as sum_GL, &i as n from source2;
quit;
proc append base=sum_l_1 data=sum_l;
run;
%end;
%end;
%mend;
Deeply appreciate for any help! Thank you.
Hi,
@1) no there will be 1 dataset with million observations.
this loop:
do _I_ = 1 to 1000000;
repeats your simulations million times (i.e. it replaces the macro)
this loop:
do point = 1 to nobs;
set source1 point = point nobs = nobs;
just loops through source1 dataset (as your macro %sim_2() was doing)
thisto lines:
sim_ML_final + sim_ML;
sim_GL_final + sim_GL;
are just "aggregators" and replaces proc sql.
@2) yes you need the call streaminit() [in fact also in the data step which generates dataset A]. The rand() function is random generator but if you don't set seed every time you run it you will get different results.
Here is test example of your code and the one proposed by me, on the example source1 dataset (with 9 elements). After running it 1000 times your approach run about 48.38 seconds and my code run about 0.055 sec. [btw. you had extra "s" in ``` sim_GLs = E*L; ``` in your code]. Here is the code:
data source1;
infile cards dlm = ",";
input ID $ P E L r n ho;
cards;
a1,0.5,10000,0.45,0.1,1,-0.3
a2,0.2,67795,0.55,0.6,2,0.55
a3,0.6,56781,0.66,-0.8,3,0.31
a4,0.5,10000,0.45,0.1,4,-0.3
a5,0.2,67795,0.55,0.6,5,0.55
a6,0.6,56781,0.66,-0.8,6,0.31
a7,0.5,10000,0.45,0.1,7,-0.3
a8,0.2,67795,0.55,0.6,8,0.55
a9,0.6,56781,0.66,-0.8,9,0.31
;
run;
/*
ID = Identity
P = range from 0 to 1
E = greater than 0
L = range from 0 to 1
r = range from -1 to 1
ho = range from -5 to 5
n = row number
*/
/* "MACRO" approach */
%let number_of_iterations = 1000;
%let t = %sysfunc(time());
data a (drop=i);
do i = 1 to &number_of_iterations.;
n = i;
output;
end;
run;
data a(drop=w x y af1 af2);
set a;
call streaminit(12345); /* to make it reproducible */
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_1 = af1;
call symputx('sim_1'||left(n),sim_1,'g');
run;
options nomprint;
%macro sim_2();
data source2 (keep= sim_ML sim_GL);
set source1;
call streaminit(12345); /* to make it reproducible */
do until (w_m<1);
x_m = 2 *rand("Uniform") - 1;
y_m = 2 *rand("Uniform") - 1;
w_m= x_m**2 + y_m**2;
end;
w_m = Sqrt((-2 * (Log(w_m))) / w_m);
af1_m = 1 * x_m * w_m + 0;
af2_m = 1 * y_m * w_m + 0;
sim_m1 = af1_m;
sim_m2 = af2_m;
if mod(n,2)=0 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0;
end;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_g1 = af1;
sim_g2 = af2;
if mod(n,2)=0 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GL = E*L; else sim_GL = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0;
end;
run;
%mend;
%macro mcarlo();
%do i = 1 %to &number_of_iterations.;
%sim_2;
%if &i = 1 %then %do;
proc sql noprint;
create table sum_l_&i as select sum(sim_ML) as sum_ML, sum(sim_GL) as sum_GL, &i as n from source2;
quit;
%end;
%else %if &i > 1 %then %do;
proc sql noprint;
create view sum_l as select sum(sim_ML) as sum_ML, sum(sim_GL) as sum_GL, &i as n from source2;
quit;
proc append base=sum_l_1 data=sum_l;
run;
%end;
%end;
%mend;
%mcarlo()
%let t = %sysevalf( %sysfunc(time()) - &t. );
%put Macro version &=t.;
/* "DATA STEP" approach */
%let number_of_iterations = 1000;
%let t = %sysfunc(time());
data a (drop=i);
do i = 1 to &number_of_iterations.;
n = i;
output;
end;
run;
data a(drop=w x y af1 af2);
set a;
call streaminit(12345); /* to make it reproducible */
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_1 = af1;
/*call symputx('sim_1'||left(n),sim_1,'g');*/
run;
%let t = %sysfunc(time());
data sum_l_Bart (keep= sim_ML_final sim_GL_final n);
array sim_[&number_of_iterations.] _temporary_;
do until(eof);
set a end=eof;
sim_[n] = sim_1;
end;
call streaminit(12345); /* to make it reproducible */
do _I_ = 1 to &number_of_iterations.;
sim_ML_final = 0;
sim_GL_final = 0;
do point = 1 to nobs;
set source1 point = point nobs = nobs;
do until (w_m<1);
x_m = 2 *rand("Uniform") - 1;
y_m = 2 *rand("Uniform") - 1;
w_m= x_m**2 + y_m**2;
end;
w_m = Sqrt((-2 * (Log(w_m))) / w_m);
af1_m = 1 * x_m * w_m + 0;
af2_m = 1 * y_m * w_m + 0;
sim_m1 = af1_m;
sim_m2 = af2_m;
if mod(n,2)=0 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0;
end;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_g1 = af1;
sim_g2 = af2;
if mod(n,2)=0 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GL = E*L; else sim_GL = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0;
end;
sim_ML_final + sim_ML;
sim_GL_final + sim_GL;
end;
n = _I_;
output;
end;
stop;
run;
%let t = %sysevalf( %sysfunc(time()) - &t. );
%put Datastep with loop &=t.;
Hope it help.
Bart
what is `source1`? any example? it's size?
Bart
Could you share some example of source1? so we could test and play with your code
First thing I would consider to do would be moving it all into one data step loop without macro at all.
I would use an array for dataset A (it's only 16MB) to avoid generating a million macrovariables.
You could use SASFILE statement (it opens a SAS data set and allocates enough buffers to hold the entire file in memory).
Eventually I would consider running it in parallel (here is simple tutorial you may use to learn how to do it with systask: http://www.mini.pw.edu.pl/~bjablons/SASpublic/Parallel-processing-in-BASE-SAS.sas) but as I wrote first try to put it all into one data step.
Btw. 1 observation - you are using rand() function but I don't see call streaminit() what makes your exercise not reproducible.
Bart
An example of source1 will be something like this:
ID, P,E,L,r,n,ho
a1,0.5,10000,0.45,0.1,1,-0.3
a2,0.2,67795,0.55,0.6,2,0.55
a3,0.6,56781,0.66,-0.8,3,0.31
ID = Identity
P = range from 0 to 1
E = greater than 0
L = range from 0 to 1
r = range from -1 to 1
ho = range from -5 to 5
n = row number
You may generate the data sample based on the criteria above. My 'source1' dataset is around 4,000 observations. Hope this helps.
one more thought, doing proc append million times doesn't seem to be very practical, maybe just create a milion of `sum_l_1 -- sum_l_1000000` and then combine them into 1 dataset by use SET statement option OPEN=DEFER, something like:
data ALL;
set sum_l_: OPEN=DEFER;
run;
Bart
I have considered this method but i do not want to overload the storage by creating a million datasets.
Hi,
try this (but start with smaller scale, e.g. 100):
data a (drop=i);
do i = 1 to 1000000;
n = i;
output;
end;
run;
data a(drop=w x y af1 af2);
set a;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_1 = af1;
/*call symputx('sim_1'||left(n),sim_1,'g');*/
run;
data source2 (keep= sim_ML_final sim_GL_final);
array sim_[1000000] _temporary_;
do until(eof);
set a end=eof;
sim_[n] = sim_1;
end;
call streaminit(12345); /* to make it reproducible */
do _I_ = 1 to 1000000;
sum_ML_final = 0;
sum_GL_final = 0;
do point = 1 to nobs;
set source1 point = point nobs = nobs;
do until (w_m<1);
x_m = 2 *rand("Uniform") - 1;
y_m = 2 *rand("Uniform") - 1;
w_m= x_m**2 + y_m**2;
end;
w_m = Sqrt((-2 * (Log(w_m))) / w_m);
af1_m = 1 * x_m * w_m + 0;
af2_m = 1 * y_m * w_m + 0;
sim_m1 = af1_m;
sim_m2 = af2_m;
if mod(n,2)=0 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0;
end;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_g1 = af1;
sim_g2 = af2;
if mod(n,2)=0 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GL = E*L; else sim_GL = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0;
end;
sim_ML_final + sim_ML;
sim_GL_final + sim_GL;
end;
output;
end;
stop;
run;
Bart
Thank you, will definitely try out this. Will keep you posted.
1. By looking at the code, this will create 1 million of datasets, is that right? Is there any other method without create 1 million datasets and would speed up the process?
2. For this code particularly, do i need to add it to my data a since there is a rand() function?
call streaminit(12345); /* to make it reproducible */
3. I have replied to the response below which explains my ultimate goal of this simulation exercise. Kindly refer to the below.
Appreciate your effort, Bart.
Hi,
@1) no there will be 1 dataset with million observations.
this loop:
do _I_ = 1 to 1000000;
repeats your simulations million times (i.e. it replaces the macro)
this loop:
do point = 1 to nobs;
set source1 point = point nobs = nobs;
just loops through source1 dataset (as your macro %sim_2() was doing)
thisto lines:
sim_ML_final + sim_ML;
sim_GL_final + sim_GL;
are just "aggregators" and replaces proc sql.
@2) yes you need the call streaminit() [in fact also in the data step which generates dataset A]. The rand() function is random generator but if you don't set seed every time you run it you will get different results.
Here is test example of your code and the one proposed by me, on the example source1 dataset (with 9 elements). After running it 1000 times your approach run about 48.38 seconds and my code run about 0.055 sec. [btw. you had extra "s" in ``` sim_GLs = E*L; ``` in your code]. Here is the code:
data source1;
infile cards dlm = ",";
input ID $ P E L r n ho;
cards;
a1,0.5,10000,0.45,0.1,1,-0.3
a2,0.2,67795,0.55,0.6,2,0.55
a3,0.6,56781,0.66,-0.8,3,0.31
a4,0.5,10000,0.45,0.1,4,-0.3
a5,0.2,67795,0.55,0.6,5,0.55
a6,0.6,56781,0.66,-0.8,6,0.31
a7,0.5,10000,0.45,0.1,7,-0.3
a8,0.2,67795,0.55,0.6,8,0.55
a9,0.6,56781,0.66,-0.8,9,0.31
;
run;
/*
ID = Identity
P = range from 0 to 1
E = greater than 0
L = range from 0 to 1
r = range from -1 to 1
ho = range from -5 to 5
n = row number
*/
/* "MACRO" approach */
%let number_of_iterations = 1000;
%let t = %sysfunc(time());
data a (drop=i);
do i = 1 to &number_of_iterations.;
n = i;
output;
end;
run;
data a(drop=w x y af1 af2);
set a;
call streaminit(12345); /* to make it reproducible */
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_1 = af1;
call symputx('sim_1'||left(n),sim_1,'g');
run;
options nomprint;
%macro sim_2();
data source2 (keep= sim_ML sim_GL);
set source1;
call streaminit(12345); /* to make it reproducible */
do until (w_m<1);
x_m = 2 *rand("Uniform") - 1;
y_m = 2 *rand("Uniform") - 1;
w_m= x_m**2 + y_m**2;
end;
w_m = Sqrt((-2 * (Log(w_m))) / w_m);
af1_m = 1 * x_m * w_m + 0;
af2_m = 1 * y_m * w_m + 0;
sim_m1 = af1_m;
sim_m2 = af2_m;
if mod(n,2)=0 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0;
end;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_g1 = af1;
sim_g2 = af2;
if mod(n,2)=0 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GL = E*L; else sim_GL = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * &&sim_1&i + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0;
end;
run;
%mend;
%macro mcarlo();
%do i = 1 %to &number_of_iterations.;
%sim_2;
%if &i = 1 %then %do;
proc sql noprint;
create table sum_l_&i as select sum(sim_ML) as sum_ML, sum(sim_GL) as sum_GL, &i as n from source2;
quit;
%end;
%else %if &i > 1 %then %do;
proc sql noprint;
create view sum_l as select sum(sim_ML) as sum_ML, sum(sim_GL) as sum_GL, &i as n from source2;
quit;
proc append base=sum_l_1 data=sum_l;
run;
%end;
%end;
%mend;
%mcarlo()
%let t = %sysevalf( %sysfunc(time()) - &t. );
%put Macro version &=t.;
/* "DATA STEP" approach */
%let number_of_iterations = 1000;
%let t = %sysfunc(time());
data a (drop=i);
do i = 1 to &number_of_iterations.;
n = i;
output;
end;
run;
data a(drop=w x y af1 af2);
set a;
call streaminit(12345); /* to make it reproducible */
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_1 = af1;
/*call symputx('sim_1'||left(n),sim_1,'g');*/
run;
%let t = %sysfunc(time());
data sum_l_Bart (keep= sim_ML_final sim_GL_final n);
array sim_[&number_of_iterations.] _temporary_;
do until(eof);
set a end=eof;
sim_[n] = sim_1;
end;
call streaminit(12345); /* to make it reproducible */
do _I_ = 1 to &number_of_iterations.;
sim_ML_final = 0;
sim_GL_final = 0;
do point = 1 to nobs;
set source1 point = point nobs = nobs;
do until (w_m<1);
x_m = 2 *rand("Uniform") - 1;
y_m = 2 *rand("Uniform") - 1;
w_m= x_m**2 + y_m**2;
end;
w_m = Sqrt((-2 * (Log(w_m))) / w_m);
af1_m = 1 * x_m * w_m + 0;
af2_m = 1 * y_m * w_m + 0;
sim_m1 = af1_m;
sim_m2 = af2_m;
if mod(n,2)=0 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0;
end;
do until (w<1);
x = 2 *rand("Uniform") - 1;
y = 2 *rand("Uniform") - 1;
w = x**2 + y**2;
end;
w = Sqrt((-2 * (Log(w))) / w);
af1 = 1 * x * w + 0;
af2 = 1 * y * w + 0;
sim_g1 = af1;
sim_g2 = af2;
if mod(n,2)=0 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GL = E*L; else sim_GL = 0;
end;
else
if mod(n,2)=1 then do;
if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0;
end;
sim_ML_final + sim_ML;
sim_GL_final + sim_GL;
end;
n = _I_;
output;
end;
stop;
run;
%let t = %sysevalf( %sysfunc(time()) - &t. );
%put Datastep with loop &=t.;
Hope it help.
Bart
And here is the log for million for 4000 obs:
1 data source0; 2 infile cards dlm = ","; 3 input ID $ P E L r n ho; 4 cards; NOTE: The data set WORK.SOURCE0 has 10 observations and 7 variables. NOTE: DATA statement used (Total process time): real time 0.00 seconds cpu time 0.00 seconds 15 ; 16 run; 17 18 data source1; 19 do _N_ = 1 to 400; 20 do point = 1 to nobs; 21 set source0 point = point nobs = nobs; 22 output; 23 end; 24 end; 25 stop; 26 run; NOTE: The data set WORK.SOURCE1 has 4000 observations and 7 variables. NOTE: DATA statement used (Total process time): real time 0.01 seconds cpu time 0.00 seconds 27 28 29 /* 30 ID = Identity 31 P = range from 0 to 1 32 E = greater than 0 33 L = range from 0 to 1 34 r = range from -1 to 1 35 ho = range from -5 to 5 36 n = row number 37 */ 38 39 40 %let number_of_iterations = 1000000; 41 42 %let t = %sysfunc(time()); 43 data a (drop=i); 44 do i = 1 to &number_of_iterations.; 45 n = i; 46 output; 47 end; 48 run; NOTE: The data set WORK.A has 1000000 observations and 1 variables. NOTE: DATA statement used (Total process time): real time 0.04 seconds cpu time 0.04 seconds 49 data a(drop=w x y af1 af2); 50 set a; 51 call streaminit(12345); /* to make it reproducible */ 52 do until (w<1); 53 x = 2 *rand("Uniform") - 1; 54 y = 2 *rand("Uniform") - 1; 55 w = x**2 + y**2; 56 end; 57 w = Sqrt((-2 * (Log(w))) / w); 58 af1 = 1 * x * w + 0; 59 af2 = 1 * y * w + 0; 60 sim_1 = af1; 61 /*call symputx('sim_1'||left(n),sim_1,'g');*/ 62 run; NOTE: There were 1000000 observations read from the data set WORK.A. NOTE: The data set WORK.A has 1000000 observations and 2 variables. NOTE: DATA statement used (Total process time): real time 0.20 seconds cpu time 0.20 seconds 63 64 %let t = %sysfunc(time()); 65 data sum_l_Bart (keep= sim_ML_final sim_GL_final n); 66 67 array sim_[&number_of_iterations.] _temporary_; 68 do until(eof); 69 set a end=eof; 70 sim_[n] = sim_1; 71 end; 72 73 call streaminit(12345); /* to make it reproducible */ 74 75 do _I_ = 1 to &number_of_iterations.; 76 77 sum_ML_final = 0; 78 sum_GL_final = 0; 79 80 do point = 1 to nobs; 81 set source1 point = point nobs = nobs; 82 do until (w_m<1); 83 x_m = 2 *rand("Uniform") - 1; 84 y_m = 2 *rand("Uniform") - 1; 85 w_m= x_m**2 + y_m**2; 86 end; 87 w_m = Sqrt((-2 * (Log(w_m))) / w_m); 88 af1_m = 1 * x_m * w_m + 0; 89 af2_m = 1 * y_m * w_m + 0; 90 sim_m1 = af1_m; 91 sim_m2 = af2_m; 92 93 94 if mod(n,2)=0 then do; 95 if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m1 < P then sim_ML = E*L; else sim_ML = 0; 96 end; 97 else 98 if mod(n,2)=1 then do; 99 if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_m2 < P then sim_ML = E*L; else sim_ML = 0; 100 end; 101 102 do until (w<1); 103 x = 2 *rand("Uniform") - 1; 104 y = 2 *rand("Uniform") - 1; 105 w = x**2 + y**2; 106 end; 107 w = Sqrt((-2 * (Log(w))) / w); 108 af1 = 1 * x * w + 0; 109 af2 = 1 * y * w + 0; 110 sim_g1 = af1; 111 sim_g2 = af2; 112 113 if mod(n,2)=0 then do; 114 if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g1 < P then sim_GL = E*L; else sim_GL = 0; 115 end; 116 else 117 if mod(n,2)=1 then do; 118 if r * ho * sim_[_I_] + sqrt(1-(r*ho)**2)*sim_g2 < P then sim_GL = E*L; else sim_GL = 0; 119 end; 120 121 sim_ML_final + sim_ML; 122 sim_GL_final + sim_GL; 123 end; 124 n = _I_; 125 output; 126 end; 127 128 stop; 129 run; NOTE: There were 1000000 observations read from the data set WORK.A. NOTE: The data set WORK.SUM_L_BART has 1000000 observations and 3 variables. NOTE: DATA statement used (Total process time): real time 39:54.36 cpu time 39:52.71 130 131 %let t = %sysevalf( %sysfunc(time()) - &t. ); 132 %put Datastep with loop &=t.; Datastep with loop T=2394.3770000934
Bart
Hi Bart,
I have tested out the code and it works fine except for one thing:
The output variables (sim_ML_final and sim_GL_final) increase as n increases. Can you explain the reason behind it?
Hi,
My fault, sorry for that, I put the code with spelling it was:
sum_ML_final = 0;
sum_GL_final = 0;
and it should be:
sim_ML_final = 0;
sim_GL_final = 0;
sorry for that. I've corrected the answer I posted.
Bart
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
What’s the difference between SAS Enterprise Guide and SAS Studio? How are they similar? Just ask SAS’ Danny Modlin.
Find more tutorials on the SAS Users YouTube channel.