<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: merge all IDs from 3 files , anyway to avoid sorting (files are large) in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265245#M52136</link>
    <description>&lt;P&gt;&lt;FONT face="comic sans ms,sans-serif"&gt;Once you have the id_list table, you can easily do left joins to create flags for the sources.&amp;nbsp; I prefer the flag(0,1) implementation because it makes it easy to sum them to get counts:&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="courier new,courier"&gt;proc sql;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select count(*) as cnt &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; from id_list;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;create table id_summary as &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select a.id, &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; case when missing(b1.id) then 0 else 1 end as tbl1_flag 'tbl1 flag', &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; case when missing(b2.id) then 0 else 1 end as tbl2_flag 'tbl2 flag', &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; case when missing(b3.id) then 0 else 1 end as tbl3_flag 'tbl3 flag',&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; calculated tbl1_flag + &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; calculated tbl2_flag + &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; calculated tbl3_flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; as tbl_cnt &amp;nbsp; 'tbl cnt' &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp; id_list A&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;left join tbl1 B1 on b1.id = a.id&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;left join tbl2 B2 on b2.id = a.id&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;left join tbl3 B3 on b3.id = a.id&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select tbl_cnt, count(*) as cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp; id_summary&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;group&amp;nbsp; by tbl_cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select tbl1_flag, tbl2_flag, tbl3_flag, count(*) as cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp; id_summary&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;group&amp;nbsp; by tbl1_flag, tbl2_flag, tbl3_flag&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select&amp;nbsp; sum(tbl1_flag) as tbl1_cnt,&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum(tbl2_flag) as tbl2_cnt,&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum(tbl3_flag) as tbl3_cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp;&amp;nbsp; id_summary;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;/*&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;--------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;4400000&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;tbl cnt&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;------------------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp; 2800000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 2&amp;nbsp;&amp;nbsp; 1400000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3&amp;nbsp;&amp;nbsp;&amp;nbsp; 200000&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; tbl1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; tbl2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; tbl3&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;--------------------------------------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 400000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp; 800000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 200000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp; 1600000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 400000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp; 800000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 200000&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;tbl1_cnt&amp;nbsp; tbl2_cnt&amp;nbsp; tbl3_cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;----------------------------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;3000000&amp;nbsp;&amp;nbsp; 2000000&amp;nbsp;&amp;nbsp; 1200000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;*/&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 20 Apr 2016 21:45:45 GMT</pubDate>
    <dc:creator>Pamela_JSRCC</dc:creator>
    <dc:date>2016-04-20T21:45:45Z</dc:date>
    <item>
      <title>merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265202#M52125</link>
      <description>&lt;P&gt;I have 3 files, all have ID variables. The IDs are in the same format accross the 3 files. They can appear in only 1 file, or 2 files, or 3 files. In each file they are unique (no duplicate), but not sorted. I want to get all unique IDs from the 3 files, only the ID variable is enough.&lt;/P&gt;
&lt;P&gt;The files are large, so the best is to avoid proc sort. Any suggestions?&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 19:12:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265202#M52125</guid>
      <dc:creator>fengyuwuzu</dc:creator>
      <dc:date>2016-04-20T19:12:18Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265213#M52128</link>
      <description>&lt;P&gt;I couldn't program this fast and easily enough, but I know there are those watching who can.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Create a hash table based on ID.&amp;nbsp; There would be one variable in the data portion of the hash table, which would work as follows.&amp;nbsp; When loading from each data source, add 1 to the data portion that matches the ID.&amp;nbsp; At the end, unload any ID values that have a data portion of 1.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Note that using a counter doesn't tell you where the ID came from.&amp;nbsp; If you need that information (other than just the fact that the ID was unique), you could increment by 1, 2, and 4 (or by 1, 10, and 100) instead of adding 1 each time.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Of course, all of this depends on having enough memory to store the hash table.&amp;nbsp; So it may depend on your definition of a "large" data set.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;EDITED AFTER THE FACT:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I'm not sure I understood what you meant by getting "unique IDs".&amp;nbsp; I was trying to get those IDs that appeared in only one of the three data sets.&amp;nbsp; It could easily be that you meant to get a complete list of all IDs, but with no duplicates.&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 21:12:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265213#M52128</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2016-04-20T21:12:49Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265220#M52129</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/56807"&gt;@fengyuwuzu﻿&lt;/a&gt;,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;When it comes to performance questions, many technical details can be important. So, I guess it would be interesting (for the experts in this matter out there) to know:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;1. How many observations (in millions) do the three files (SAS datasets, or?) have?&lt;BR /&gt;2. What are type, length and typical values of the ID variable?&lt;BR /&gt;3. Do indexes exist for one or more of the three files (see PROC CONTENTS output)?&lt;BR /&gt;4. Do you have an idea how large the overlaps between the sets of IDs will be?&lt;BR /&gt;5. Any hardware limitations (RAM, free disk space) that may be important?&lt;BR /&gt;6. What are you planning to do with the dataset containing the unique IDs?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 20:20:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265220#M52129</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2016-04-20T20:20:25Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265222#M52131</link>
      <description>&lt;P&gt;&lt;FONT face="comic sans ms,sans-serif"&gt;Using proc sql and doing a union is probably one of the easiest ways.&amp;nbsp; I created 3 tables with IDs, scrambled them and then created another table with the just the unique IDs.&amp;nbsp; From the 6.2 million rows in the original tables, there were only 4.4 million unique IDs created.&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;data tbl1 (keep=id skey);&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp; do i = 1 to 3000000; &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; id&amp;nbsp;&amp;nbsp; = (i*2);&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; skey = rand('Uniform');&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp; end;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;run;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;data tbl2 (keep=id skey);&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp; do i = 1 to 2000000; &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; id&amp;nbsp;&amp;nbsp; = (i*3);&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; skey = rand('Uniform');&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp; end;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;run;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;data tbl3 (keep=id skey);&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp; do i = 1 to 1200000; &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; id&amp;nbsp;&amp;nbsp; = (i*5);&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; skey = rand('Uniform');&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp; end;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;run;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;proc sort data=tbl1; by skey; run;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;proc sort data=tbl2; by skey; run;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;proc sort data=tbl3; by skey; run;&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;proc sql stimer;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;create table id_list as &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; select distinct id from tbl1&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;union select distinct id from tbl2&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;union select distinct id from tbl3;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;/*&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;NOTE: Table WORK.ID_LIST created, with 4400000 rows and 1 columns.&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;NOTE: SQL Statement used (Total process time):&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; real time&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3.31 seconds&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cpu time&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 4.88 seconds&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;*/&lt;/FONT&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 20:28:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265222#M52131</guid>
      <dc:creator>Pamela_JSRCC</dc:creator>
      <dc:date>2016-04-20T20:28:17Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265227#M52133</link>
      <description>&lt;P&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13994"&gt;@Pamela_JSRCC﻿&lt;/a&gt;: Good suggestion! I think it would save time to omit the DISTINCT keyword. The result of the UNION operator is free of duplicates anyway.&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 20:46:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265227#M52133</guid>
      <dc:creator>FreelanceReinh</dc:creator>
      <dc:date>2016-04-20T20:46:31Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265236#M52134</link>
      <description>&lt;P&gt;Thank you for all replies.&lt;/P&gt;
&lt;P&gt;One of my file is about 80G with &amp;gt;750 million rows. But I decided to extract the ID column only which makes the file much smaller.&lt;/P&gt;
&lt;P&gt;With the ID_only files, proc sort is not a problem any more. (my ID is characteric with length 44)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Yes, I wanted to add one column indicating source, like AB showing the ID exists in both A and D sets; ABC indicating in all 3 sets.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you all again!&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 21:14:18 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265236#M52134</guid>
      <dc:creator>fengyuwuzu</dc:creator>
      <dc:date>2016-04-20T21:14:18Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265245#M52136</link>
      <description>&lt;P&gt;&lt;FONT face="comic sans ms,sans-serif"&gt;Once you have the id_list table, you can easily do left joins to create flags for the sources.&amp;nbsp; I prefer the flag(0,1) implementation because it makes it easy to sum them to get counts:&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;FONT face="courier new,courier"&gt;proc sql;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select count(*) as cnt &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; from id_list;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;create table id_summary as &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select a.id, &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; case when missing(b1.id) then 0 else 1 end as tbl1_flag 'tbl1 flag', &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; case when missing(b2.id) then 0 else 1 end as tbl2_flag 'tbl2 flag', &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; case when missing(b3.id) then 0 else 1 end as tbl3_flag 'tbl3 flag',&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; calculated tbl1_flag + &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; calculated tbl2_flag + &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; calculated tbl3_flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; as tbl_cnt &amp;nbsp; 'tbl cnt' &lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp; id_list A&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;left join tbl1 B1 on b1.id = a.id&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;left join tbl2 B2 on b2.id = a.id&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;left join tbl3 B3 on b3.id = a.id&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select tbl_cnt, count(*) as cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp; id_summary&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;group&amp;nbsp; by tbl_cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select tbl1_flag, tbl2_flag, tbl3_flag, count(*) as cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp; id_summary&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;group&amp;nbsp; by tbl1_flag, tbl2_flag, tbl3_flag&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;select&amp;nbsp; sum(tbl1_flag) as tbl1_cnt,&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum(tbl2_flag) as tbl2_cnt,&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; sum(tbl3_flag) as tbl3_cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;from&amp;nbsp;&amp;nbsp;&amp;nbsp; id_summary;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;/*&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;--------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;4400000&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;tbl cnt&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;------------------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp; 2800000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 2&amp;nbsp;&amp;nbsp; 1400000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 3&amp;nbsp;&amp;nbsp;&amp;nbsp; 200000&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; tbl1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; tbl2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; tbl3&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; flag&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;--------------------------------------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 400000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp; 800000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 200000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp; 1600000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 400000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 0&amp;nbsp;&amp;nbsp;&amp;nbsp; 800000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 1&amp;nbsp;&amp;nbsp;&amp;nbsp; 200000&lt;/FONT&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;tbl1_cnt&amp;nbsp; tbl2_cnt&amp;nbsp; tbl3_cnt&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;----------------------------&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;&amp;nbsp;3000000&amp;nbsp;&amp;nbsp; 2000000&amp;nbsp;&amp;nbsp; 1200000&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT face="courier new,courier"&gt;*/&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 20 Apr 2016 21:45:45 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265245#M52136</guid>
      <dc:creator>Pamela_JSRCC</dc:creator>
      <dc:date>2016-04-20T21:45:45Z</dc:date>
    </item>
    <item>
      <title>Re: merge all IDs from 3 files , anyway to avoid sorting (files are large)</title>
      <link>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265435#M52195</link>
      <description>Thank you!!  This is really a brilliant idea. I learned a lot again.</description>
      <pubDate>Thu, 21 Apr 2016 14:33:57 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/merge-all-IDs-from-3-files-anyway-to-avoid-sorting-files-are/m-p/265435#M52195</guid>
      <dc:creator>fengyuwuzu</dc:creator>
      <dc:date>2016-04-21T14:33:57Z</dc:date>
    </item>
  </channel>
</rss>

