<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic set and merge in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954438#M372747</link>
    <description>&lt;P&gt;Hello&lt;/P&gt;
&lt;P&gt;Let's say that I have&amp;nbsp; a data set with the customers in&amp;nbsp; desired population (100,000 customers).&lt;/P&gt;
&lt;P&gt;Let's say that there are 12 data sets (Each data set represent month .Name of data set in structure YYMM).&lt;/P&gt;
&lt;P&gt;Note that each data set from the 12 data sets have 2 million rows.&lt;/P&gt;
&lt;P&gt;I want to set 12 data sets but only forthe customers in my population.&lt;/P&gt;
&lt;P&gt;I run this code but I want to ask if can have better code run more quickly&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data  LongFile_t ;
SET  PCS_2211_Y
     PCS_2210_Y
	 PCS_2209_Y
	 PCS_2208_Y
	 PCS_2207_Y
	 PCS_2206_Y
	 PCS_2205_Y
	 PCS_2204_Y
	 PCS_2203_Y
	 PCS_2202_Y
	 PCS_2201_Y
	 PCS_2112_Y;
Run;

proc sort data=MyPop_tbl(Keep=CustID);
by CustID;
Run;

Data want;
Merge MyPop_tbl(in=a KEEP=CustID)  LongFile_t(in=b);
by CustID;
If a;
Run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 22 Dec 2024 10:49:51 GMT</pubDate>
    <dc:creator>Ronein</dc:creator>
    <dc:date>2024-12-22T10:49:51Z</dc:date>
    <item>
      <title>set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954438#M372747</link>
      <description>&lt;P&gt;Hello&lt;/P&gt;
&lt;P&gt;Let's say that I have&amp;nbsp; a data set with the customers in&amp;nbsp; desired population (100,000 customers).&lt;/P&gt;
&lt;P&gt;Let's say that there are 12 data sets (Each data set represent month .Name of data set in structure YYMM).&lt;/P&gt;
&lt;P&gt;Note that each data set from the 12 data sets have 2 million rows.&lt;/P&gt;
&lt;P&gt;I want to set 12 data sets but only forthe customers in my population.&lt;/P&gt;
&lt;P&gt;I run this code but I want to ask if can have better code run more quickly&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data  LongFile_t ;
SET  PCS_2211_Y
     PCS_2210_Y
	 PCS_2209_Y
	 PCS_2208_Y
	 PCS_2207_Y
	 PCS_2206_Y
	 PCS_2205_Y
	 PCS_2204_Y
	 PCS_2203_Y
	 PCS_2202_Y
	 PCS_2201_Y
	 PCS_2112_Y;
Run;

proc sort data=MyPop_tbl(Keep=CustID);
by CustID;
Run;

Data want;
Merge MyPop_tbl(in=a KEEP=CustID)  LongFile_t(in=b);
by CustID;
If a;
Run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 22 Dec 2024 10:49:51 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954438#M372747</guid>
      <dc:creator>Ronein</dc:creator>
      <dc:date>2024-12-22T10:49:51Z</dc:date>
    </item>
    <item>
      <title>Re: set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954439#M372748</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Maybe this is quicker ?? Maybe not. */
/* Just test to find out!              */

/* Below code was typed 'blind' ... without any testing on concrete data */

%LET WhereClause=%str(where CustID IN (select CustID from MyPop_tbl(Keep=CustID)));
%PUT &amp;amp;=WhereClause;

/* OUTER UNION CORR in PROC SQL                */
/* is equivalent to SET statement in data step */
proc sql;
 create table want as
  select * from PCS_2211_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2210_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2209_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2208_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2207_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2206_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2205_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2204_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2203_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2202_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2201_Y &amp;amp;WhereClause.  OUTER UNION CORR
  select * from PCS_2112_Y &amp;amp;WhereClause. ;
QUIT;
/* end of program */&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Koen&lt;/P&gt;</description>
      <pubDate>Sun, 22 Dec 2024 14:30:07 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954439#M372748</guid>
      <dc:creator>sbxkoenk</dc:creator>
      <dc:date>2024-12-22T14:30:07Z</dc:date>
    </item>
    <item>
      <title>Re: set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954441#M372749</link>
      <description>&lt;P&gt;I am missing something in your code: you should either sort the LongFile_t or you should build an index while creating it. I replicated your program in a somewhat simplified version: The monthly files are created by following step:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data one; * repeat with data sets two - twelve;
length ID $1;
do n=1 to 2000000;
   x = 100*uniform(0);
   a = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
   ID = substr(a,int(uniform(0)*26+1),1);
   output;
end;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Then I glue them together either by sorting (note that I use a data step VIEW to glue the files virtually together. That saves writing out the long file.):&lt;/FONT&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data long/view=long;
set one two three four five six seven eight nine ten eleven twelve;
run;
proc sort data=long out=long2;
by ID;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;or the way you did it, but creating an index on ID on the fly:&lt;/FONT&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data long(index=(ID));
set one two three four five six seven eight nine ten eleven twelve;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Via both paths you can do the final merge.&lt;BR /&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;I selected 4 ID's to get from the big file as my target population (selecting some 3.7 million obs). These were my running times for the various steps:&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Create the long file with the index: real time 5.80 seconds, cpu time 3.57 seconds&lt;BR /&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Merge based on the indexed file: real time 10.04 seconds, cpu time 8.87 seconds&lt;BR /&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Create the long file via data step VIEW: real time 0.01 seconds, cpu time 0.00 seconds&lt;BR /&gt;Sort the VIEW: real time 5.95 seconds, cpu time 2.15 seconds&lt;BR /&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Merge based on the sorted long file: real time 1.33 seconds, cpu time 0.34 seconds&lt;BR /&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;The alternative approach would be to sort the separate PCS files and filter your group at that level and glue the resulting files together.&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;Sorting one file of 2,000,000 observations took 0.33 seconds RT and 0.11 seconds CPU, creating the selection took 0.13 seconds RT and 0.10 seconds CPU. Multiplied by 12 and add a step to glue everything together you are possibly still better of.&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;I hope this helps.&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 22 Dec 2024 15:10:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954441#M372749</guid>
      <dc:creator>ErikT_NL</dc:creator>
      <dc:date>2024-12-22T15:10:04Z</dc:date>
    </item>
    <item>
      <title>Re: set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954454#M372757</link>
      <description>&lt;P&gt;You did not provide some important information.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Are the existing datasets already sorted by CUSTID?&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;If so you can do it all in one data step:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set MyPop_tbl(in=in1 KEEP=CustID) 
      PCS_2211_Y
      PCS_2210_Y
      PCS_2209_Y
      PCS_2208_Y
      PCS_2207_Y
      PCS_2206_Y
      PCS_2205_Y
      PCS_2204_Y
      PCS_2203_Y
      PCS_2202_Y
      PCS_2201_Y
      PCS_2112_Y
  ;
  by custid;
  if first.custid then want=0;
  want+in1;
  if want and not in1 then output;
  drop want;
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;How large is the set of CUSTID's that you want to keep?&amp;nbsp; Is it small enough to put into a macro variable? I macro variable can hold 64K bytes.&lt;/LI&gt;
&lt;/UL&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql noprint;
select distinct custid into :list separated by ' ' 
  from MyPop_tbl
;
quit;
data want;
 set ..... ;
  where custid in (&amp;amp;list) ;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Note: if CUSTID is character use:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;select distinct quote(trim(custid),"'") into ...&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Or is it at least small enough to fit into a HASH object in memory?&lt;/LI&gt;
&lt;/UL&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set PCS_2211_Y
      PCS_2210_Y
      PCS_2209_Y
      PCS_2208_Y
      PCS_2207_Y
      PCS_2206_Y
      PCS_2205_Y
      PCS_2204_Y
      PCS_2203_Y
      PCS_2202_Y
      PCS_2201_Y
      PCS_2112_Y
  ;
  if _n_=1 then do;
    declare hash h(dataset:'mypop_tbl(keep=custid)');
    h.definekey('custid');
    h.definedata('custid');
    h.definedone();
  end;
  if 0=h.find();
run;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note if your dataset names had the numeric pattern at the END of the name you could use dataset lists to make the SET statement shorter.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;  set PCS_Y_2211-PCS_Y_2201 PCS_Y_2112 ;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 22 Dec 2024 23:54:39 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954454#M372757</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2024-12-22T23:54:39Z</dc:date>
    </item>
    <item>
      <title>Re: set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954457#M372760</link>
      <description>&lt;P&gt;Tom already gave you the Hash Table method, if you are not familiar with Hash, try the following SQL method:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data  LongFile_t ;
SET  PCS_2211_Y
     PCS_2210_Y
	 PCS_2209_Y
	 PCS_2208_Y
	 PCS_2207_Y
	 PCS_2206_Y
	 PCS_2205_Y
	 PCS_2204_Y
	 PCS_2203_Y
	 PCS_2202_Y
	 PCS_2201_Y
	 PCS_2112_Y;
Run;


proc sql;
create table want as
select * from LongFile_t
 where CustID in (select distinct CustID from MyPop_tbl);
quit;

&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 23 Dec 2024 01:49:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954457#M372760</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2024-12-23T01:49:55Z</dc:date>
    </item>
    <item>
      <title>Re: set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954467#M372767</link>
      <description>&lt;P&gt;Another take: if your MyPop_tbl is not too big, let us say less than 100,000 you could also convert it into a format by creating an CNTLIN data set. Use a label that cannot be confused with an ID, e.g. '##@@!!'. Then you can read your monthly files, and test whether a put of the ID with that format matches the label. If so, write it out. My experience is that working with a format is often faster than merging.&lt;/P&gt;&lt;P&gt;A simplified model below (using the long file from my other response):&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data cntlin;
fmtname='$MyPop';
label = '##@@!!';
DO start = 'D','H','Q','V';
   output;
end;
run;
proc format cntlin=cntlin;
run;

data MySelection;
set long;
if put(ID,$MyPop.)='##@@!!' then output;
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Dec 2024 09:01:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954467#M372767</guid>
      <dc:creator>ErikT_NL</dc:creator>
      <dc:date>2024-12-23T09:01:20Z</dc:date>
    </item>
    <item>
      <title>Re: set and merge</title>
      <link>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954474#M372769</link>
      <description>&lt;P&gt;One data step, no sorting:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;Data WANT;

declare hash H(dataset:"MyPop_tbl(Keep=CustID)");
H.DefineKey("CustID");
H.DefineDone();

do until(_E_);
  SET 
    PCS_2211_Y
    PCS_2210_Y
    PCS_2209_Y
    PCS_2208_Y
    PCS_2207_Y
    PCS_2206_Y
    PCS_2205_Y
    PCS_2204_Y
    PCS_2203_Y
    PCS_2202_Y
    PCS_2201_Y
    PCS_2112_Y
    END=_E_
  ;
  if 0=H.check() then output;
end;

stop;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Bart&lt;/P&gt;</description>
      <pubDate>Mon, 23 Dec 2024 09:59:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/set-and-merge/m-p/954474#M372769</guid>
      <dc:creator>yabwon</dc:creator>
      <dc:date>2024-12-23T09:59:31Z</dc:date>
    </item>
  </channel>
</rss>

