It's a bit like the simulated set below 16 obs for this one and the sample set is read 17 times. My own dataset with the front half 0.8*sample_wage<=wage nets the same amount of matches, adding 0.8*sample_wage<=wage<=10*sample_wage nets 5 obs but I can count from the previous matches that there are defnitely more than 5 matches...Any ideas? data Simulated;
do i=1 to 100000000;
x=ranuni(1);
if x < .5 then
gender='M';
else
gender='F';
y = ranuni (1);
if y<.5 then ethnicity = 'B';
if y>.5 then ethnicity = 'W';
if gender='M' and ethnicity = 'W' then
wage = 50000+ rand("Normal")*30000;
if gender='M' and ethnicity = 'B' then
wage = 40000+ rand ("normal")*21000;
if gender='F' and ethnicity = 'W' then
wage=45000 + rand("Normal")*20000;
if gender='F' and ethnicity = 'B' then
wage=39000 + rand("Normal")*15000;
output;
end;
run;
proc surveyselect data=simulated
method=srs n=500 out=SampleSRS (rename=(wage=sample_wage gender=sample_gender ethnicity=sample_ethnicity i=sample_i));
run;
data matched;
do i=1 to nobs;
set samplesrs nobs=nobs;
found=0;
do j=1 by 1 until (found);
set simulated;
if
sample_wage*0.9<=wage<=sample_wage*10
then do;
found=1;
output;
end;
end;
end;
run;
... View more