Great discussion ! Here is a simple example showing the dangers of trusting the seed setting mechanism to get reproducible random sequences in SQL. Note that the only difference between the two queries is the order of the datasets mentioned in the inner join
data mydata1;
do cnt=1 to 10;
output;
end;
run;
proc sort data=mydata1 out=mydata2; by descending cnt; run;
%let seed = 27182818;
proc sql;
create table RandSQL1(drop=dumb) as
select mydata1.cnt,
streaminit(&seed) as dumb,
rand('uniform') as u,
rand('normal') as n
from mydata1 inner join mydata2 on mydata1.cnt=mydata2.cnt
order by cnt;
quit;
proc sql;
create table RandSQL2(drop=dumb) as
select mydata1.cnt,
streaminit(&seed) as dumb,
rand('uniform') as u,
rand('normal') as n
from mydata2 inner join mydata1 on mydata1.cnt=mydata2.cnt
order by cnt;
quit;
proc sql;
select a.cnt, a.u=b.u, a.n=b.n
from randsql1 as a, randsql2 as b
where a.cnt=b.cnt;
quit;
... View more