<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Stratified Sampling Macro in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338757#M77208</link>
    <description>&lt;P&gt;OR could be done more simpler.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data control;
infile cards expandtabs truncover;
input StartGroup	EndGroup	Percent;
cards;
1	1	25
1	2	25
1	3	25
1	4	25
5	5	50
5	6	25
5	7	25
;
run;

data control;
 set control;
 by startgroup;
 if first.startgroup then cum=0;
 cum+percent;
run;
data key;
 set control;
  by startgroup;
  lag=lag(cum);
  if first.startgroup then start=1;
   else start=lag+1;
   
  do i=start to cum;
   output;
  end;
  drop lag start cum percent;
run;



/*Input dataset*/
data have;
 do group=1,5;
  do x=1 to 200;
   output;
  end;
 end;
run;

proc surveyselect data=have out=temp groups=100;
strata group;
run;
proc sort data=temp;
by group groupid;
run;

data want;
 merge temp key(rename=(startgroup=group i=groupid));
 by group groupid;
run;

proc print;run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 07 Mar 2017 11:18:36 GMT</pubDate>
    <dc:creator>Ksharp</dc:creator>
    <dc:date>2017-03-07T11:18:36Z</dc:date>
    <item>
      <title>Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338518#M77100</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please can you help me with the following programming task.&amp;nbsp; I have an individual-level (1 row per person) dataset that contains a Group ID variable.&amp;nbsp; Depending on the value of the Group ID, I would like to split the group using stratified random sampling (stratification variable: Score_Band).&amp;nbsp; The groups that require splitting and the manner of the split are provided in a 'control' dataset, as follows:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;StartGroup&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;EndGroup&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Percent&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;50&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;25&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In the above example table, the subset of individuals belonging to group 1 are to be split across 4 groups (labelled 1 - 4), each containing 25% of the volume.&amp;nbsp; Individuals belonging to group 5 are to be split into three groups (5 - 7), containing 50%, 25% and 25% of the volume, respectively.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am trying to write some code that will take an input dataset (1 row per individual) that contains an initial GroupID variable, and maps it to a new GroupID variable, based on what is specified in the 'control' table.&amp;nbsp; Each split is to be based on stratified random sampling (variable Score_Band).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any help would be greatly appreciated,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hoa&lt;/P&gt;</description>
      <pubDate>Mon, 06 Mar 2017 18:17:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338518#M77100</guid>
      <dc:creator>HoaTruong</dc:creator>
      <dc:date>2017-03-06T18:17:59Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338668#M77171</link>
      <description>&lt;P&gt;Is it what you looking for.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
infile cards expandtabs truncover;
input StartGroup	EndGroup	Percent;
cards;
1	1	.25
1	2	.25
1	3	.25
1	4	.25
5	5	.50
5	6	.25
5	7	.25
;
run;

proc surveyselect data=have out=want sampsize=100 outhits method=pps_wr;
strata startgroup;
size percent;
run;

proc print ;run;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 07 Mar 2017 03:52:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338668#M77171</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-07T03:52:33Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338672#M77173</link>
      <description>&lt;P&gt;I agree with the approach that&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/18408"&gt;@Ksharp&lt;/a&gt;&amp;nbsp;suggested but, based on your requirements, think you might want to use a different method. I'd suggest:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;proc surveyselect data=have out=want sampsize=100 outhits method=pps_sys;
  strata startgroup;
  size percent;
run;
&lt;/PRE&gt;
&lt;P&gt;Art, CEO, AnalystFinder.com&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Mar 2017 04:15:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338672#M77173</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2017-03-07T04:15:37Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338727#M77195</link>
      <description>&lt;P&gt;Thank you both for your quick and helpful replies - I really appreciate it!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Apologies if I'm misunderstanding, but I'm wondering how I can apply the sampling logic you've provided&amp;nbsp;to my input dataset.&amp;nbsp; In addition to the 'control' table below...&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;TABLE&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;&lt;STRONG&gt;Group&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;EndGroup&lt;/STRONG&gt;&lt;/TD&gt;&lt;TD&gt;&lt;STRONG&gt;Percent&lt;/STRONG&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;2&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;3&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;1&lt;/TD&gt;&lt;TD&gt;4&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;0.5&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;6&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;5&lt;/TD&gt;&lt;TD&gt;7&lt;/TD&gt;&lt;TD&gt;0.25&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I also have an input dataset (please see the attached file).&amp;nbsp; The input dataset contains one row per individual.&amp;nbsp; Based on the control table above, I would like to split the existing groups into sub-groups, using the stratified sampling procedure.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;There are 42 individuals belonging to group 1 in the input table.&amp;nbsp; I would like these to be split into 4 groups:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;1 - containing roughly 25% of the original group 1&lt;/P&gt;&lt;P&gt;2 - containing roughly 25% of the original group 1&lt;/P&gt;&lt;P&gt;3 - containing roughly 25% of the original group 1&lt;/P&gt;&lt;P&gt;4 - containing roughly 25% of the original group 1&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Similarly, there are 234 individuals belonging to group 5 in the input table.&amp;nbsp; I would like these split into 3 groups:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;5 - containing roughly 50% of the original group 5&lt;/P&gt;&lt;P&gt;6 - containing roughly 25% of the original group 5&lt;/P&gt;&lt;P&gt;7 - containing roughly 25% of the original group 5&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;On the input dataset, I have a variable called Score_Band (with possible values 'A', 'B' and 'C').&amp;nbsp; When I'm splitting the dataset, I like the sampling to be stratified using Score_Band.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm aiming to have an output dataset that is the same as the input dataset, but with a new group variable added - based on the new splits.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks again for your help - I really appreciate it!&lt;/P&gt;</description>
      <pubDate>Tue, 07 Mar 2017 09:15:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338727#M77195</guid>
      <dc:creator>HoaTruong</dc:creator>
      <dc:date>2017-03-07T09:15:30Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338749#M77205</link>
      <description>&lt;P&gt;OK. How about this one ?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data control;
infile cards expandtabs truncover;
input StartGroup	EndGroup	Percent;
cards;
1	1	25
1	2	25
1	3	25
1	4	25
5	5	50
5	6	25
5	7	25
;
run;

data control;
 set control;
 by startgroup;
 if first.startgroup then cum=0;
 cum+percent;
run;
data key;
 set control;
  by startgroup;
  lag=lag(cum);
  if first.startgroup then start=1;
   else start=lag+1;
   
  do i=start to cum;
   output;
  end;
  drop lag start cum percent;
run;



/*Input dataset*/
data have;
 do group=1,5;
  do x=1 to 200;
   output;
  end;
 end;
run;
data have;
 set have;
 call streaminit(123456789);
 random=rand('uniform');
run;


proc rank data=have out=temp groups=100 ;
by group;
var random;
ranks r;
run;
proc sort data=temp ;
by group r;
run;
data temp;
 set temp;
 i=r+1;
 drop random r;
run;

data want;
 merge temp key(rename=(startgroup=group));
 by group i;
run;

proc print;run;

&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 07 Mar 2017 10:50:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338749#M77205</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-07T10:50:09Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338757#M77208</link>
      <description>&lt;P&gt;OR could be done more simpler.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data control;
infile cards expandtabs truncover;
input StartGroup	EndGroup	Percent;
cards;
1	1	25
1	2	25
1	3	25
1	4	25
5	5	50
5	6	25
5	7	25
;
run;

data control;
 set control;
 by startgroup;
 if first.startgroup then cum=0;
 cum+percent;
run;
data key;
 set control;
  by startgroup;
  lag=lag(cum);
  if first.startgroup then start=1;
   else start=lag+1;
   
  do i=start to cum;
   output;
  end;
  drop lag start cum percent;
run;



/*Input dataset*/
data have;
 do group=1,5;
  do x=1 to 200;
   output;
  end;
 end;
run;

proc surveyselect data=have out=temp groups=100;
strata group;
run;
proc sort data=temp;
by group groupid;
run;

data want;
 merge temp key(rename=(startgroup=group i=groupid));
 by group groupid;
run;

proc print;run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 07 Mar 2017 11:18:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338757#M77208</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-07T11:18:36Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338761#M77211</link>
      <description>&lt;P&gt;Thank you very much for the quick response!&amp;nbsp; I'll give it a try and will report back! &lt;img id="smileyhappy" class="emoticon emoticon-smileyhappy" src="https://communities.sas.com/i/smilies/16x16_smiley-happy.png" alt="Smiley Happy" title="Smiley Happy" /&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Mar 2017 11:31:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338761#M77211</guid>
      <dc:creator>HoaTruong</dc:creator>
      <dc:date>2017-03-07T11:31:05Z</dc:date>
    </item>
    <item>
      <title>Re: Stratified Sampling Macro</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338804#M77230</link>
      <description>Thank you very much - this is just what I needed!</description>
      <pubDate>Tue, 07 Mar 2017 14:15:15 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Stratified-Sampling-Macro/m-p/338804#M77230</guid>
      <dc:creator>HoaTruong</dc:creator>
      <dc:date>2017-03-07T14:15:15Z</dc:date>
    </item>
  </channel>
</rss>

