<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Method to rerun code until a condition is met in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521574#M141535</link>
    <description>I should clarify...I don't want BOTH records with duplicate IDs. One of the records in the append_set is fine.</description>
    <pubDate>Fri, 14 Dec 2018 17:36:18 GMT</pubDate>
    <dc:creator>tpt1</dc:creator>
    <dc:date>2018-12-14T17:36:18Z</dc:date>
    <item>
      <title>Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521572#M141534</link>
      <description>&lt;P&gt;Hi-&lt;/P&gt;&lt;P&gt;I have&amp;nbsp;3 data sets that are created using&amp;nbsp;PROC SURVEYSELECT, and that are then appended using a data step.&amp;nbsp; The&amp;nbsp;3 datasets have the potential to contain duplicate&amp;nbsp;IDs between them.&amp;nbsp; The fact that there is duplication&amp;nbsp;across the&amp;nbsp;3 datasets is&amp;nbsp;expected and is not an issue, though I do not want records with duplicate&amp;nbsp;IDs&amp;nbsp;in my appended file.&amp;nbsp; The appended file can have no additional records that the end user could substitute in case a duplicate record is found.&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;My first thought is to use DO UNTIL that could rerun the&amp;nbsp;3 PROC SURVEYSELECT statements until the appended file has no duplication, but this seems cumbersome.&amp;nbsp; Can anyone provide an efficient way to append multiple datasets and guarantee no duplication?&amp;nbsp; I included an abridged version of my code below:&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;proc&lt;/STRONG&gt;&lt;/FONT&gt; &lt;STRONG&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;surveyselect&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;data&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=set_1 &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;sampsize&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="2"&gt;30&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;out&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=set_1_sample;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;RUN&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;proc&lt;/STRONG&gt;&lt;/FONT&gt; &lt;STRONG&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;surveyselect&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;data&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=set_2 &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;sampsize&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="2"&gt;27&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;out&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=set_2_sample;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;RUN&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;proc&lt;/STRONG&gt;&lt;/FONT&gt; &lt;STRONG&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;surveyselect&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;data&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=set_3 &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;sampsize&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=&lt;/FONT&gt;&lt;STRONG&gt;&lt;FONT color="#008080" face="Courier New" size="2"&gt;45&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;out&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=set_3_sample;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;RUN&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;data&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; append_set;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;set&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; set_1_sample set_2_sample set_3_sample;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="Courier New" size="2"&gt;run;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="Courier New" size="2"&gt;&lt;FONT face="Courier New" size="2"&gt;quit;&lt;/FONT&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;proc&lt;/STRONG&gt;&lt;/FONT&gt; &lt;STRONG&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;sort&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;data&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;=append_set ;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;by&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; ID;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;run&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;/* check for duplicates */&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;data&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; dups;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;set&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; append_set; &lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;by&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; ID; &lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;if&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; first.ID and last.ID &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;then&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; ;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;else&lt;/FONT&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;output&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;run&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;/* identify the count of duplicates */&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;proc&lt;/STRONG&gt;&lt;/FONT&gt; &lt;STRONG&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;sql&lt;/FONT&gt;&lt;/STRONG&gt; &lt;FONT color="#0000ff" face="Courier New" size="2"&gt;noprint&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;select&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; count(*) &lt;/FONT&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;into&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; :dup_count&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;from&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; dups;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#000080" face="Courier New" size="2"&gt;&lt;STRONG&gt;quit&lt;/STRONG&gt;&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt;;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" face="Courier New" size="2"&gt;%put&lt;/FONT&gt;&lt;FONT face="Courier New" size="2"&gt; &amp;amp;dup_count;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="Courier New" size="2"&gt;&amp;nbsp;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="Courier New" size="2"&gt;/* add DO UNTIL which would run this code until the condition "dup_count=0" has been met */&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;DO UNTIL(&amp;amp;dup_count=0);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 14 Dec 2018 17:34:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521572#M141534</guid>
      <dc:creator>tpt1</dc:creator>
      <dc:date>2018-12-14T17:34:18Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521574#M141535</link>
      <description>I should clarify...I don't want BOTH records with duplicate IDs. One of the records in the append_set is fine.</description>
      <pubDate>Fri, 14 Dec 2018 17:36:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521574#M141535</guid>
      <dc:creator>tpt1</dc:creator>
      <dc:date>2018-12-14T17:36:18Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521584#M141539</link>
      <description>&lt;P&gt;try this&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data dups;
set append_set; 
by ID; 
if not first.ID and not last.ID then output;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 14 Dec 2018 17:52:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521584#M141539</guid>
      <dc:creator>VDD</dc:creator>
      <dc:date>2018-12-14T17:52:39Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521612#M141546</link>
      <description>&lt;P&gt;Thanks VDD, but I still have duplicate IDs in my append_set.&amp;nbsp; I don't see another way to remove them without reducing the required population size (which in this example is 102).&lt;/P&gt;</description>
      <pubDate>Fri, 14 Dec 2018 19:17:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521612#M141546</guid>
      <dc:creator>tpt1</dc:creator>
      <dc:date>2018-12-14T19:17:09Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521617#M141548</link>
      <description>&lt;P&gt;I am a little concerned that the approach you have been following is going to actually yield incorrect selection probabilities and weights.&lt;/P&gt;
&lt;P&gt;Consider if set_2 has 100 records and I select 10 with SRS. The probability of selection is .1 and weight would be 10.&lt;/P&gt;
&lt;P&gt;But if 2 of them are duplicates and you remove them then the remaining records still have a weight of 10 but should actually be 12.5 (100 / &lt;span class="lia-unicode-emoji" title=":smiling_face_with_sunglasses:"&gt;😎&lt;/span&gt; if they are to represent the population of set_2.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;And worse by compounding with the third set.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Perhaps you can describe the populations that set_1, set_2 and set_3 represent and why there are duplicates?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I might consider scrubbing the source data sets to identify the duplicate persons and randomly remove them from the other sets so the person only appears once across the three source sets.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Something like this:&lt;/P&gt;
&lt;PRE&gt;data work.set1;
   do id = 1 to 1000;
      if rand('uniform') &amp;lt; .3 then output;
   end;
run;
data work.set2;
   do id = 1 to 1000;
      if rand('uniform') &amp;lt; .3 then output;
   end;
run;
data work.set3;
   do id = 1 to 1000;
      if rand('uniform') &amp;lt; .3 then output;
   end;
run;

data work.all;
   length dataname $30.;
   set work.set1 work.set2 work.set3 indsname=dsn;
   dataname = dsn;
run;

proc sort data=work.all;
   by id;
run;

proc transpose data=work.all out=work.trans;
   by id;
   var dataname;
run;

data work.choose;
   set work.trans;
   array c col: ;
   str= catx('|',of c(*));
   if countw(str,'|')=2 then do;
      set = c[rand('table',.5,.5)];
   end;
   else if countw(str,'|')=3 then do;
      set = c[rand('table',1/3,1/3,1/3)];
   end;
   else set=col1;
   keep id set;
run;

proc sort data=work.choose;
   by set;
run;

proc surveyselect data=work.choose
   sampsize=(30 27 45)
   out = work.selected
;
   strata set;
run;


&lt;/PRE&gt;
&lt;P&gt;You would have to merge the result of just working with the ids back to original data for other variables but that should not be difficult.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 14 Dec 2018 19:37:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521617#M141548</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2018-12-14T19:37:43Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521638#M141550</link>
      <description>&lt;P&gt;not sure if I understand all your requirements, and note that I did NOT actually run the attached code, but I do stuff like this all the time; maybe what you are looking for is accomplished like so:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/*
 assuming that ONE and only one observation from a set of duplicates
 (those having same ID value) are to be kept in the 'singles' dataset;
 observations for which there is one and only ID value are to be kept in the singles dataset;
 and that only records having more than one observation within a single ID value are to be kept in the multis dataset.
*/
data append_set;

set set_1_sample set_2_sample set_3_sample;
run;

/*
 if there is some variable that might be used to
 prioritize observations within a single ID,
 add those to the order by clause here;
 this would cause the most desirable observations to
 come out first.
*/

proc sql noprint ;
	create view append_set_v as
	select *
	from append_set
	order by id
;
quit ;

/*
 dataset singles will have the first observation encountered within 
 a set of duplicates within ID, along with all true singles;
 dataset multis will have only dupe-sets of GT 1 observation.&lt;BR /&gt;&lt;BR /&gt; at end, inside the multis dataset, the max seq value within ID will contain the count of records having that ID.
*/

data singles multis ;
retain seq 0 ;
	set append_set_view ;
	by ID ;
	if first.ID then do ;
		seq=1 ;
		output singles ;
	end ;
	if not ( first.ID AND last.ID ) then do ;
		output multis ;
		seq+1 ;
	end ;
run ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 14 Dec 2018 20:12:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521638#M141550</guid>
      <dc:creator>johnsville</dc:creator>
      <dc:date>2018-12-14T20:12:30Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521706#M141570</link>
      <description>&lt;P&gt;I think it would be best to consider all the data at once, eliminate duplicate ids at random and select your sample from that. Here is a simple way to do this, building on code provided by &lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data work.set1;
   do id = 1 to 1000;
      if rand('uniform') &amp;lt; .3 then output;
   end;
run;
data work.set2;
   do id = 1 to 1000;
      if rand('uniform') &amp;lt; .3 then output;
   end;
run;
data work.set3;
   do id = 1 to 1000;
      if rand('uniform') &amp;lt; .3 then output;
   end;
run;

data all;
   length dataname $8.;
   set set1 set2 set3 indsname=dsn;
   dataname = scan(dsn,2);
   rnd = rand("uniform");
run;

proc sort data=all; by id rnd; run;

data unique;
set all; by id;
if first.id;
drop rnd;
run;

proc sort data=unique; by dataname; run;

proc surveyselect 
    data =&amp;nbsp;unique
   &amp;nbsp;sampsize=(30 27 45)
    out = samples;
strata dataname;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sat, 15 Dec 2018 06:06:22 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/521706#M141570</guid>
      <dc:creator>PGStats</dc:creator>
      <dc:date>2018-12-15T06:06:22Z</dc:date>
    </item>
    <item>
      <title>Re: Method to rerun code until a condition is met</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/522198#M141736</link>
      <description>&lt;P&gt;PG Stats-&lt;/P&gt;&lt;P&gt;Yes, removing duplicates at random makes good sense.&amp;nbsp; Thanks so much to everyone for the great feedback!&lt;/P&gt;</description>
      <pubDate>Tue, 18 Dec 2018 13:20:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Method-to-rerun-code-until-a-condition-is-met/m-p/522198#M141736</guid>
      <dc:creator>tpt1</dc:creator>
      <dc:date>2018-12-18T13:20:05Z</dc:date>
    </item>
  </channel>
</rss>

