<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Simulating clustered data with known correlation / Mixture of Normal Distributions in Statistical Procedures</title>
    <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/395448#M20647</link>
    <description>&lt;P&gt;You need to specify the mean vectors and covariance for each cluster, as well as the relative proportion of observations to draw from each cluster. See &lt;A href="http://blogs.sas.com/content/iml/2017/09/13/simulate-clusters-multivariate-sas.html" target="_self"&gt;"Simulate multivariate&amp;nbsp;clusters in SAS"&lt;/A&gt; for an explanation, example, and SAS/IML code.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 13 Sep 2017 10:04:58 GMT</pubDate>
    <dc:creator>Rick_SAS</dc:creator>
    <dc:date>2017-09-13T10:04:58Z</dc:date>
    <item>
      <title>Simulating clustered data with known correlation / Mixture of Normal Distributions</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/394502#M20598</link>
      <description>&lt;P&gt;dear sas community,&lt;BR /&gt;currently i am seeking to show the&amp;nbsp;&lt;SPAN&gt;adverse effects of cluster sampling when the data in respective clusters is correlated.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;for this i want to generate a population with 500 clusters (let it be city disticts) were each cluster has 1000 correlated observations (let it be income of inhabitant).&lt;BR /&gt;From this population i want to draw samples (simple random, cluster sample) in a second step and compare their characteristics.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;SPAN&gt;i am struggeling to create correlated data in the clusters. &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;so far i played around with the rand function to create clusters and data points (code attached).&lt;BR /&gt;&lt;BR /&gt;any help on how to pre-define rho within the clusters would be very much appreciated.&amp;nbsp;&lt;BR /&gt;hope this question is not to basic. thanks in advance!&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;/* Step 1: Generate a data set that contains 500 clusters with each having 1000 inhabitants */&lt;/P&gt;&lt;P&gt;%let N = 1000; /* sample size */&lt;/P&gt;&lt;P&gt;%let NumSamples = 500; /* number of samples */&lt;/P&gt;&lt;P&gt;data LOR;&lt;/P&gt;&lt;P&gt;call streaminit(123);&lt;/P&gt;&lt;P&gt;do SampleID = 1 to &amp;amp;NumSamples; /* ID variable for each LOR */&lt;/P&gt;&lt;P&gt;do IND = 1 to &amp;amp;N;&lt;/P&gt;&lt;P&gt;tetha = 1000+SampleID*10; /* Average Income */&lt;/P&gt;&lt;P&gt;Lampda = 100; /* Std. Dev in Cluster */&lt;/P&gt;&lt;P&gt;INCOME_SPE = rand("Normal",tetha,lampda);&lt;/P&gt;&lt;P&gt;output;&lt;/P&gt;&lt;P&gt;end;&lt;/P&gt;&lt;P&gt;end;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 10 Sep 2017 12:27:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/394502#M20598</guid>
      <dc:creator>jrock</dc:creator>
      <dc:date>2017-09-10T12:27:26Z</dc:date>
    </item>
    <item>
      <title>Re: Simulating clustered data with known correlation / Mixture of Normal Distributions</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/394536#M20603</link>
      <description>&lt;P&gt;Correlation is between two variables. Are you referring to correlation or intra class correlation (ICC)?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I'm not familiar with correlation within a single variable, how are you defining that or calculating it?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;PS. This may very well be due to inexperience on my part rather than anything incorrect you're doing or saying.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 10 Sep 2017 19:25:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/394536#M20603</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2017-09-10T19:25:25Z</dc:date>
    </item>
    <item>
      <title>Re: Simulating clustered data with known correlation / Mixture of Normal Distributions</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/394567#M20606</link>
      <description>&lt;P&gt;I guess you could start with this:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Step 1: Generate a data set that contains 500 clusters with each having 1000 inhabitants */
%let N = 1000; /* Cluster size */
%let NumSamples = 500; /* Number of samples */
%let Rho = 0.1; /* Intracluster correlation coefficient */
%let Std = 100; /* Std Dev within clusters */
%let Income = 1000; /* Average income */

data LOR;
call streaminit(123);
betweenStd = sqrt( (&amp;amp;Std)**2 * &amp;amp;Rho / (1-&amp;amp;Rho) );
do SampleID = 1 to &amp;amp;NumSamples; /* ID variable for each LOR */
    clusterIncome = rand("NORMAL", &amp;amp;Income, betweenStd);     
    do IND = 1 to &amp;amp;N;
        INCOME_SPE = rand("Normal", clusterIncome, &amp;amp;Std);
        output;
        end;
    end;
run;

/* Estimate variance components */
proc varcomp data=lor;
class sampleId;
model income_spe = sampleId;
ods output Estimates=varEst;
run;

/* Estimate intracluster correlation coefficient */
data rhoEstimate;
set varEst(where=(VarComp="Var(SampleID)") rename=income_spe=betweenVar);
set varEst(where=(VarComp="Var(Error)") rename=income_spe=withinVar);
rhoEst = betweenVar / (betweenVar + withinVar);
label  
    betweenVar="Variance between clusters"
    withinVar="Variance within clusters"
    rhoEst = "Intracluster correlation estimate";
drop varcomp;
run;

proc print data=rhoEstimate label noobs; run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;                        Variance      Variance    Intracluster
                         between        within     correlation
                        clusters      clusters      estimate

                          1080.5        9982.8      0.097663
&lt;/PRE&gt;</description>
      <pubDate>Mon, 11 Sep 2017 04:46:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/394567#M20606</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2017-09-11T04:46:09Z</dc:date>
    </item>
    <item>
      <title>Re: Simulating clustered data with known correlation / Mixture of Normal Distributions</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/395448#M20647</link>
      <description>&lt;P&gt;You need to specify the mean vectors and covariance for each cluster, as well as the relative proportion of observations to draw from each cluster. See &lt;A href="http://blogs.sas.com/content/iml/2017/09/13/simulate-clusters-multivariate-sas.html" target="_self"&gt;"Simulate multivariate&amp;nbsp;clusters in SAS"&lt;/A&gt; for an explanation, example, and SAS/IML code.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 13 Sep 2017 10:04:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/395448#M20647</guid>
      <dc:creator>Rick_SAS</dc:creator>
      <dc:date>2017-09-13T10:04:58Z</dc:date>
    </item>
    <item>
      <title>Re: Simulating clustered data with known correlation / Mixture of Normal Distributions</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/396525#M20683</link>
      <description>&lt;P&gt;Thank you so much in deed this is very helpful for the univariate case in deed!!&amp;nbsp;&lt;BR /&gt;Sorry for the late reaction but this really is a very nice piece to understand the logic of the ICC..&lt;BR /&gt;Great work.&lt;/P&gt;</description>
      <pubDate>Sat, 16 Sep 2017 01:23:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/396525#M20683</guid>
      <dc:creator>jrock</dc:creator>
      <dc:date>2017-09-16T01:23:33Z</dc:date>
    </item>
    <item>
      <title>Re: Simulating clustered data with known correlation / Mixture of Normal Distributions</title>
      <link>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/396526#M20684</link>
      <description>&lt;P&gt;Thanks also to you Rick for your kind help. I am just about to get into the multivariate simulation.. Very&amp;nbsp;good blog entry for this&lt;/P&gt;</description>
      <pubDate>Sat, 16 Sep 2017 01:26:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/Statistical-Procedures/Simulating-clustered-data-with-known-correlation-Mixture-of/m-p/396526#M20684</guid>
      <dc:creator>jrock</dc:creator>
      <dc:date>2017-09-16T01:26:54Z</dc:date>
    </item>
  </channel>
</rss>

