<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Conditional interleaving of two datasets? in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928779#M365457</link>
    <description>&lt;P&gt;If original data order is important, then you can avoid sorting:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set random_a (in=ina) random_b (in=inb);

  if _n_=1 then do;
    declare hash found_in_b (dataset:'random_b (keep=target)');
      found_in_b.definekey('target');
      found_in_b.definedone();
    declare hash found_in_a ();
      found_in_a.definekey('target');
      found_in_a.definedone();
  end;

  if ina=1 and found_in_a.check()^=0 then found_in_a.add();

  if (ina=1 and found_in_b.check()=0)
     or 
     (inb=1 and found_in_a.check()=0);
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 17 May 2024 01:23:48 GMT</pubDate>
    <dc:creator>mkeintz</dc:creator>
    <dc:date>2024-05-17T01:23:48Z</dc:date>
    <item>
      <title>Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928595#M365361</link>
      <description>&lt;P&gt;Esteemed Advisors:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am trying to interleave two datasets with a condition that the resulting dataset contains only observations that can be found in both of the two datasets. &lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Below is exemplar code to illustrate the problem.&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;If you run this code and inspect dataset interleave2 you will see that for a group of&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;3 observations where target=1, two came from Random_A and one came from Random_B.&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;Likewise, for a group of three observations where target=2, two came from Random_B and one came from Random_A.&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;All of these observations need to be retained in the desired dataset.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;For the group of 3 observations where target=3, all observations came from Random_B only.&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;These are ones that need to be omitted.&lt;SPAN class="Apple-converted-space"&gt;&amp;nbsp; &lt;/SPAN&gt;All observations for a given target that come from a single source dataset are not to be retained in the desired dataset.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The challenge for me (and now for you) is to come up with the code that will interleave Random_A and Random_B such that the resultant dataset that only contains the groups of targets that are present in both datasets.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Hope this makes sense and thanks for taking a look,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Gene&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Random_A (drop=i);
call streaminit(4786);
do i=1 to 100;
Source="A";
Target=rand("Integer",1,100);
ST=catx('/',Source,Target);
output;
end;

data Random_B (drop=i);
call streaminit(6874);
do i=1 to 150;
Source="B";
Target=rand("Integer",1,100);
ST=catx('/',Source,Target);
output;
end;

Proc sort data=Random_A;
by ST;
run;

Proc sort data=Random_B;
by ST;
run;

data interleave1;
set random_A random_B;
by ST;
run;

proc sort data=interleave1 out=interleave2 nounikey;
by target;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 May 2024 04:01:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928595#M365361</guid>
      <dc:creator>genemroz</dc:creator>
      <dc:date>2024-05-16T04:01:25Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928610#M365367</link>
      <description>&lt;P&gt;Below one way how this could work.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Interleaving data is slower than concatenating. If you need the result sorted then I'd be doing this after combining the tables. Depending on how many source rows get dropped this also leads to less rows in total that need sorting.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Random_A (drop=i);
  call streaminit(4786);
  do i=1 to 100;
    Source="A";
    Target=rand("Integer",1,100);
    ST=catx('/',Source,Target);
    output;
  end;

data Random_B (drop=i);
  call streaminit(6874);
  do i=1 to 150;
    Source="B";
    Target=rand("Integer",1,100);
    ST=catx('/',Source,Target);
    output;
  end;

proc sql;
  create view work.common_vals as
  select l.target
  from Random_A l inner join Random_B r
    on l.target=r.target
  ;
quit;

data inter;
  set random_A random_B;
  if _n_=1 then
    do;
      dcl hash h1(dataset:"work.common_vals");
      h1.defineKey('Target');
      h1.defineDone();
    end;
  if h1.check()=0;
run;

proc sort data=inter out=want;
  by st;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you have a bigger data volume with repeated values for target in both tables then you could use below SQL alternative to avoid a many:many join that creates a lot of rows.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create view work.common_vals as
  select l.target
  from (select distinct target from Random_A) l inner join Random_B r
    on l.target=r.target
  ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 16 May 2024 05:51:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928610#M365367</guid>
      <dc:creator>Patrick</dc:creator>
      <dc:date>2024-05-16T05:51:27Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928616#M365370</link>
      <description>&lt;P&gt;If I understood you correctly, for each value of Target you want observations from A and B only if that Target value exists in both.&lt;/P&gt;
&lt;P&gt;If so, then try this:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Proc sort data=Random_A equals;
by Target;
run;

Proc sort data=Random_B equals;
by Target;
run;


data interleave1;
  do _N_=1 by 1 until(last.Target);
    set random_A(in=ina) random_B(in=inb);
    by Target;
    N_A+ina;
    N_B+inb;
  end;

  do _N_=1 to _N_;
    set random_A random_B curobs=curobs1;
    by Target;
    if N_A and N_B then output;
  end; 

  call missing(N_A,N_B);
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Bart&lt;/P&gt;</description>
      <pubDate>Thu, 16 May 2024 06:30:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928616#M365370</guid>
      <dc:creator>yabwon</dc:creator>
      <dc:date>2024-05-16T06:30:54Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928641#M365380</link>
      <description>&lt;P&gt;A small tweak for the sake of efficiency:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;    set random_A(in=ina keep=target) random_B(in=inb keep=target);&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 May 2024 11:16:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928641#M365380</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2024-05-16T11:16:12Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928645#M365381</link>
      <description>&lt;P&gt;I think it won't help much since the other(second) DoW-loop takes all data in the end, and SAS will likely caches the data in memory to save some I/Os after the first loop. So even if the first DoW-loop "narrows" data, the second will have to do the "missing" I/O.&lt;/P&gt;</description>
      <pubDate>Thu, 16 May 2024 12:00:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928645#M365381</guid>
      <dc:creator>yabwon</dc:creator>
      <dc:date>2024-05-16T12:00:23Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928725#M365430</link>
      <description>Both solutions proposed by Yabwon and Patrick were successful.  I marked Yabwon's as accepted because I'm not familiar with hash code objects</description>
      <pubDate>Thu, 16 May 2024 19:07:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928725#M365430</guid>
      <dc:creator>genemroz</dc:creator>
      <dc:date>2024-05-16T19:07:45Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928778#M365456</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Random_A (drop=i);
call streaminit(4786);
do i=1 to 100;
Source="A";
Target=rand("Integer",1,100);
ST=catx('/',Source,Target);
output;
end;
run;
data Random_B (drop=i);
call streaminit(6874);
do i=1 to 150;
Source="B";
Target=rand("Integer",1,100);
ST=catx('/',Source,Target);
output;
end;
run;

data temp;
 set Random_A Random_B indsname=indsname;
 dsn=indsname;
run;
proc sql;
create table want as
select * from temp group by Target having count(distinct dsn)=2;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 17 May 2024 01:05:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928778#M365456</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2024-05-17T01:05:37Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928779#M365457</link>
      <description>&lt;P&gt;If original data order is important, then you can avoid sorting:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set random_a (in=ina) random_b (in=inb);

  if _n_=1 then do;
    declare hash found_in_b (dataset:'random_b (keep=target)');
      found_in_b.definekey('target');
      found_in_b.definedone();
    declare hash found_in_a ();
      found_in_a.definekey('target');
      found_in_a.definedone();
  end;

  if ina=1 and found_in_a.check()^=0 then found_in_a.add();

  if (ina=1 and found_in_b.check()=0)
     or 
     (inb=1 and found_in_a.check()=0);
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 17 May 2024 01:23:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928779#M365457</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2024-05-17T01:23:48Z</dc:date>
    </item>
    <item>
      <title>Re: Conditional interleaving of two datasets?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928783#M365459</link>
      <description>&lt;P&gt;A minor simplification of&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/35763"&gt;@yabwon&lt;/a&gt;&amp;nbsp;solution:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data=Random_A equals;
  by target;
run;

proc sort data=Random_B equals;
  by target;
run;

data want;
  merge random_a (in=ina) random_b (in=inb) ;
  by target;

  if last.target=1 then do until (last.target);
    set random_a  random_b;
    by target;
    if ina=1 and inb=1 then output;
  end;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 17 May 2024 01:46:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Conditional-interleaving-of-two-datasets/m-p/928783#M365459</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2024-05-17T01:46:32Z</dc:date>
    </item>
  </channel>
</rss>

