<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Binomial Confidence Intervals without dataset in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247356#M56300</link>
    <description>&lt;P&gt;You can approximate the solution by creating a dataset that has the properties and then using proc freq.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I've included the code below and for comparisons&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp﻿&lt;/a&gt;&amp;nbsp;s code so you can test your results.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Hope this helps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
n=2000;
p=0.7;
type='X';
X=floor(p*n); output;
type='Y'; x=n-x; output;

run;

data ci;
alpha=0.05;
 n=2000;
 p=0.7;                     /* Probability of success */
   se = sqrt(p*(1-p) / (n-1));  /* standard error,n is the sample size  */
   z = quantile("Normal", 1-(alpha/2)); /* two-sided,if n is samll, try Binomial */
   LowerCL = p - z*se;
   UpperCL = p + z*se;
  output;
 run;

proc freq data=have;
table type/binomial (p=0.7);
weight x;
run;
 &lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 02 Feb 2016 02:03:15 GMT</pubDate>
    <dc:creator>Reeza</dc:creator>
    <dc:date>2016-02-02T02:03:15Z</dc:date>
    <item>
      <title>Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247340#M56294</link>
      <description>&lt;P&gt;How can I calculate a confidence interval for a binomial distributed proportion without using a dataset?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is there a comman equivalent to the Stata cii n Y command?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I have the sample size and probability of success, and I need to calculate the CI.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Mon, 01 Feb 2016 23:56:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247340#M56294</guid>
      <dc:creator>jlajla</dc:creator>
      <dc:date>2016-02-01T23:56:51Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247352#M56297</link>
      <description>&lt;P&gt;If your sample size is big ,and can use Normal Asymptoic distribution to calculate it . Otherwise , you need calculated the quantitle of binomial distribution.&lt;/P&gt;
&lt;P&gt;here is the code from Rick's blog .&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;A href="http://blogs.sas.com/content/iml/2015/10/28/simulation-exact-tables.html" target="_blank"&gt;http://blogs.sas.com/content/iml/2015/10/28/simulation-exact-tables.html&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE class="text"&gt;p = mean(x);                       /* P&lt;SPAN&gt;robability of success&lt;/SPAN&gt; */
   se = sqrt(p*(1-p) / (n-1));  /* standard error,n is &lt;SPAN&gt;the sample size &lt;/SPAN&gt; */
   z = quantile("Normal", 1-alpha/2); /* two-sided,if n is samll, try Binomial */
   LowerCL = p - z*se;
   UpperCL = p + z*se;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 02 Feb 2016 01:33:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247352#M56297</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2016-02-02T01:33:47Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247355#M56299</link>
      <description>&lt;P&gt;great, thank you!&lt;/P&gt;</description>
      <pubDate>Tue, 02 Feb 2016 02:03:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247355#M56299</guid>
      <dc:creator>jlajla</dc:creator>
      <dc:date>2016-02-02T02:03:08Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247356#M56300</link>
      <description>&lt;P&gt;You can approximate the solution by creating a dataset that has the properties and then using proc freq.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I've included the code below and for comparisons&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp﻿&lt;/a&gt;&amp;nbsp;s code so you can test your results.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Hope this helps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
n=2000;
p=0.7;
type='X';
X=floor(p*n); output;
type='Y'; x=n-x; output;

run;

data ci;
alpha=0.05;
 n=2000;
 p=0.7;                     /* Probability of success */
   se = sqrt(p*(1-p) / (n-1));  /* standard error,n is the sample size  */
   z = quantile("Normal", 1-(alpha/2)); /* two-sided,if n is samll, try Binomial */
   LowerCL = p - z*se;
   UpperCL = p + z*se;
  output;
 run;

proc freq data=have;
table type/binomial (p=0.7);
weight x;
run;
 &lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 02 Feb 2016 02:03:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247356#M56300</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2016-02-02T02:03:15Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247358#M56301</link>
      <description>&lt;P&gt;thank you. This is what I was considering doing. It seems it might be a bit more straightforward.&lt;/P&gt;</description>
      <pubDate>Tue, 02 Feb 2016 02:06:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/247358#M56301</guid>
      <dc:creator>jlajla</dc:creator>
      <dc:date>2016-02-02T02:06:57Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/249567#M56530</link>
      <description>&lt;P&gt;It should be noted that the CI computed by the formula suggested is not identical to the similar CI computed by PROC FREQ.&amp;nbsp;The latter is the well-known Wald CI with denominator n in the radicand, whereas the formula copied from&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13684"&gt;@Rick_SAS﻿&lt;/a&gt;'s &lt;A href="http://blogs.sas.com/content/iml/2015/10/28/simulation-exact-tables.html" target="_blank"&gt;blog&lt;/A&gt; uses&amp;nbsp;denominator n-1.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The standard error of point estimator p (sample proportion of successes) of binomial parameter P is sqrt(P&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;(1-P)/n) (&lt;A href="http://www.wiley.com/WileyCDA/WileyTitle/productCd-0471526290.html" target="_blank"&gt;Fleiss et al., Statistical Methods for Rates and Proportions, 3rd ed.&lt;/A&gt;, p. 26).&amp;nbsp;That's where denominator n comes from.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Denominator n-1 could be justified by the fact that n&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;p&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;(1-p)/(n-1) is the uniform minimum variance unbiased estimator of P&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;(1-P) (&lt;A href="http://www.springer.com/us/book/9780387985022" target="_blank"&gt;Lehmann/Casella, Theory of Point Estimation, 2nd ed.&lt;/A&gt;, p. 100).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Obviously, with denominator n-1 the CI is wider, but only slightly so if n is large.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Just for curiosity I computed coverage probabilities for this CI ("CI2") and compared them to those of the Wald CI ("CI1") and of the Wald CI with continuity correction ("CI3"), p -/+ (z&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;sqrt(p&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;(1-p)/n)+1/(2&lt;FONT face="symbol"&gt;*&lt;/FONT&gt;n)) (Fleiss et al., p. 29; z=probit(0.975)), all for confidence level 0.95.&amp;nbsp;The investigation was restricted to 17&amp;lt;=n&amp;lt;=200 and P=k/10000 with integers k such that nP&amp;gt;=5 and n(1-P)&amp;gt;=5 (criterion from Fleiss et al., p. 26), a total of 1,590,280 pairs (n, P).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;U&gt;Some results:&lt;/U&gt;&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Among the investigated 184 values of n, the proportion of cases (i.e. values of p) with coverage probabilities (CP) &amp;gt;=0.95 varied between 36.6% and 84.1% for CI3&amp;nbsp;(&amp;gt;=50% for each n&amp;gt;=24), but reached only 0.3 - 24.5% for CI1 and 4.5 - 28.1% for CI2.&lt;/LI&gt;
&lt;LI&gt;Ranges of minimum coverage probabilities were [0.873, 0.894] for CI1, [0.874, 0.899] for CI2 and still only [0.896, 0.914] for CI3, in spite of the restrictions nP&amp;gt;=5 and n(1-P)&amp;gt;=5.&lt;/LI&gt;
&lt;LI&gt;CP for CI1 and CI2 were equal in 83.6 - 94.8% of cases ("CP1=CP2").&lt;/LI&gt;
&lt;LI&gt;For each n (14, 15, ..., 200) there were &amp;gt;1.5% cases with CP1 &amp;lt; CP2 &amp;lt;= 0.95. For some n, e.g. n=34, this proportion exceeded 10%.&lt;/LI&gt;
&lt;LI&gt;The proportion of cases with CP1 &amp;lt; 0.95 &amp;lt; CP2 ranged from 1.9% to 11.1%. In most (&amp;gt;85%) of these cases CP2 was closer to 0.95 than CP1.&lt;/LI&gt;
&lt;LI&gt;Not a single case was observed in which 0.95 &amp;lt;= CP1 &amp;lt; CP2.&lt;/LI&gt;
&lt;LI&gt;In many cases (26.8 - 55.9%) CP2 was equal to CP3.&lt;/LI&gt;
&lt;LI&gt;Proportions of cases with CP2 &amp;lt; 0.95 &amp;lt; CP3 and CP3 closer to 0.95 had a similar range: 22.4 - 50.7%.&lt;/LI&gt;
&lt;LI&gt;The range of proportions of cases with 0.95 &amp;lt; CP2 &amp;lt; CP3 was only 0 - 8.0%.&lt;/LI&gt;
&lt;LI&gt;For only 16 values of n the proportion of cases with either CP1 or CP2 in ]0.94, 0.96[ and CP3 outside this interval was&amp;nbsp;greater than the proportion of cases showing the opposite behavior.&lt;/LI&gt;
&lt;LI&gt;Overall, the proportions of cases with CP in ]0.94, 0.96[ were 61.3%, 65.8% and 71.6%, respectively (CP1, CP2, CP3).&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;So, at least for 17&amp;lt;=n&amp;lt;=200 the CI with denominator n-1 has advantages over the standard Wald CI. In many of these cases a continuity correction makes sense.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;However, there are CIs for the binomial proportion with even better coverage properties without being overly conservative (as the Clopper-Pearson CI): see &lt;A href="http://www-stat.wharton.upenn.edu/~tcai/paper/Binomial-StatSci.pdf" target="_blank"&gt;Brown et al. (2001), Interval Estimation for&amp;nbsp;a Binomial Proportion&lt;/A&gt;, where&amp;nbsp;also some of the above results on CI1 (and many more) can be found.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Below (under the spoiler tag) is part of the code I used for my investigations.&lt;/P&gt;
&lt;LI-SPOILER&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;%macro binci(n);

/* Compute the three 95% CIs for all possible outcomes of n Bernoulli samples */

data ci95;
do k=0 to &amp;amp;n;
  p0=k/&amp;amp;n;
  z=probit(0.975);
  d=z*sqrt(p0*(1-p0)/&amp;amp;n);
  pL1=p0-d;
  pU1=p0+d;
  d=d+1/(2*&amp;amp;n);
  pL3=p0-d;
  pU3=p0+d;
  d=z*sqrt(p0*(1-p0)/(&amp;amp;n-1));
  pL2=p0-d;
  pU2=p0+d;
  output;
end;
drop p0 z d;
run;

/* Determine the range of true parameter p (times 10000) in which np&amp;gt;=5 and n(1-p)&amp;gt;=5 */

%let iL=%sysfunc(ceil(50000/&amp;amp;n));
%let iU=%eval(10000-&amp;amp;iL);

/* Calculate coverage probabilities depending on p, incrementing p by 0.0001 */

data tmp;
set ci95;
do i=&amp;amp;iL to &amp;amp;iU;
  p=i/10000;
  w1=(pL1 &amp;lt;= p &amp;lt;= pU1)*pdf('binom',k,p,&amp;amp;n);
  w2=(pL2 &amp;lt;= p &amp;lt;= pU2)*pdf('binom',k,p,&amp;amp;n);
  w3=(pL3 &amp;lt;= p &amp;lt;= pU3)*pdf('binom',k,p,&amp;amp;n);
  output;
end;
drop i;
run;

proc summary data=tmp nway;
class p;
var w1-w3;
output out=covprob sum=;
run;

/* Compute descriptive statistics and create plot for coverage probabilities */

proc means data=covprob mean std min q1 median q3 max;
var w1-w3;
run;

goptions reset=all;

symbol1 v=x    c=red    h=0.9;
symbol2 v=plus c=orange h=0.6;
symbol3 v=dot  c=green  h=0.3;
axis1 order=(0.85 to 1 by 0.005)
      label=(angle=90 'Coverage probability');

legend1 label=('CI') value=('Wald' 'Wald, but with denominator n-1' 'Wald with continuity correction')
        across=1 mode=protect position=(top right inside) shape=symbol(3, .9);

title "Coverage Probabilities of 95% Confidence Intervals for Binomial Parameter p when n=&amp;amp;n, np&amp;gt;=5 and n(1-p)&amp;gt;=5";

proc gplot data=covprob;
plot w1*p=1 w2*p=2 w3*p=3 / overlay vaxis=axis1 vref=0.95 legend=legend1;
run;
quit;

title;

%mend binci;

%binci(17)  /* n= 17, p=0.2942, 0.2943, 0.2944, ..., 0.7057, 0.7058 */
%binci(25)  /* n= 25, p=0.2,    0.2001, 0.2002, ..., 0.7999, 0.8    */
%binci(50)  /* n= 50, p=0.1,    0.1001, 0.1002, ..., 0.8999, 0.9    */
%binci(100) /* n=100, p=0.05,   0.0501, 0.0502, ..., 0.9499, 0.95   */&lt;/CODE&gt;&lt;/PRE&gt;
&lt;/LI-SPOILER&gt;</description>
      <pubDate>Thu, 11 Feb 2016 23:15:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/249567#M56530</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2016-02-11T23:15:23Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/249627#M56540</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/32733"&gt;@FreelanceReinh﻿&lt;/a&gt;&amp;nbsp;and &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp﻿&lt;/a&gt;&amp;nbsp;Sorry about that n-1 in the denominator.&amp;nbsp; I suspect I copied and pasted that code from some earlier program that computed CIs for a different distribution.&amp;nbsp; I intended to use the&amp;nbsp;same formula that PROC FREQ uses. I will have to edit the blog post to correct that error.&amp;nbsp; I suggest that the OP use the (correct!) Wald formula.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;By the way, I wrote a blog post about &lt;A href="http://blogs.sas.com/content/iml/2012/03/14/the-normal-approximation-to-the-binomial-distribution-how-the-quantiles-compare.html" target="_self"&gt;the normal approximation to the binomial distribution&lt;/A&gt;, which is the basis for the CI formula.&amp;nbsp; The experts might enjoy reading it.&lt;/P&gt;</description>
      <pubDate>Fri, 12 Feb 2016 11:31:16 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/249627#M56540</guid>
      <dc:creator>Rick_SAS</dc:creator>
      <dc:date>2016-02-12T11:31:16Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/249657#M56543</link>
      <description>&lt;P&gt;thank you, everoyone. it's good to know there are options.&lt;/P&gt;</description>
      <pubDate>Fri, 12 Feb 2016 13:40:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/249657#M56543</guid>
      <dc:creator>jlajla</dc:creator>
      <dc:date>2016-02-12T13:40:04Z</dc:date>
    </item>
    <item>
      <title>Re: Binomial Confidence Intervals without dataset</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/250003#M56565</link>
      <description>&lt;P&gt;After further thought, I no longer think there was an error in my original blog post.&amp;nbsp; My blog post was about estimating a p-value for a Monte Carlo simulation. As shown in this PROC FREQ documentation, &lt;A href="http://support.sas.com/documentation/cdl/en/procstat/68142/HTML/default/viewer.htm#procstat_freq_details96.htm" target="_self"&gt;the standard error for the Monte Carlo estimate of a binomial proportion&lt;/A&gt; has an N-1 in the denominator.&amp;nbsp; It hardly matters when you are doing 10,000 Monte Carlo simulations, but N-1 is the correct denominator.&amp;nbsp; (The OP for this problem should still use N, since that is correct for the SE for the usual estimator p_hat = k/N.&lt;/P&gt;</description>
      <pubDate>Sun, 14 Feb 2016 21:54:00 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/Binomial-Confidence-Intervals-without-dataset/m-p/250003#M56565</guid>
      <dc:creator>Rick_SAS</dc:creator>
      <dc:date>2016-02-14T21:54:00Z</dc:date>
    </item>
  </channel>
</rss>

