<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: compare each line of a data table 2 by 2 in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121992#M24990</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;What kind of comparison are you looking for? And do you actually mean compare the first row with all of 220,000 rows?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If so, I'm afraid that what you're desribing is going to create on the order of 220000! (factorial) comparisons and isn't likely to finish in any reasonable time.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Mon, 21 May 2012 14:58:33 GMT</pubDate>
    <dc:creator>ballardw</dc:creator>
    <dc:date>2012-05-21T14:58:33Z</dc:date>
    <item>
      <title>compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121991#M24989</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I would like to compare each line of a data table 2 by 2.&lt;/P&gt;&lt;P&gt;(the first obs with all the following, then the second with all the following, etc.)&lt;/P&gt;&lt;P&gt;I found a way to do it with 2 do loops in a macro but it's too slow&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I had in mind to make something like this but it doesn't work since I have a limited understanding of how data steps work&lt;/P&gt;&lt;P&gt;I'm not even sure it's the best way to achieve what I need, so any other idea is welcome&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; %let WorkingTable = MyTable&lt;/P&gt;&lt;P&gt;&amp;nbsp; %let nobs=20; /* simplified, number of observations in &amp;amp;WorkingTable = ~220 000 */&lt;/P&gt;&lt;P&gt;&amp;nbsp; %let k=Gender; /* simplified, should be more than one variable in the end : =Gender Postcode */&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; data _null_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; set &amp;amp;WorkingTable;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; %let i=_N_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; call symputx('m', &amp;amp;k);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; data _null_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; set &amp;amp;WorkingTable (firstobs=&amp;amp;i);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; call symputx('n', &amp;amp;k);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if &amp;amp;m=&amp;amp;n then putlog 'Ok!';&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Feel free to ask if I'm not clear enough.&lt;/P&gt;&lt;P&gt;Thanks for your help&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 21 May 2012 12:28:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121991#M24989</guid>
      <dc:creator>mathias</dc:creator>
      <dc:date>2012-05-21T12:28:30Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121992#M24990</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;What kind of comparison are you looking for? And do you actually mean compare the first row with all of 220,000 rows?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If so, I'm afraid that what you're desribing is going to create on the order of 220000! (factorial) comparisons and isn't likely to finish in any reasonable time.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Mon, 21 May 2012 14:58:33 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121992#M24990</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2012-05-21T14:58:33Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121993#M24991</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;It looks like you are looking for:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;data _null_;
 set sashelp.class(keep=name);
 do i=_n_ to nobs;
&amp;nbsp; set sashelp.class(keep=name rename=(name=_name)) nobs=nobs point=i;
&amp;nbsp; put _all_;
 end;
run;

&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 22 May 2012 04:54:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121993#M24991</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-05-22T04:54:19Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121994#M24992</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;Thanks for taking the time to help me&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;gt; &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;What kind of comparison are you looking for?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;I want to search for duplicated rows, the only way is to compare each row with each other (proc sort nodupkey does not allow nearby dates to be considered as equal)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;gt; &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;And do you actually mean compare the first row with all of 220,000 rows?&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="background-color: #ffffff; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;Yes, (&lt;/SPAN&gt;&lt;SPAN style="background-color: #ffffff; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;The first with the 219 999 others rows; then the second with the 219 998 others etc.)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;gt; &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;If so, I'm afraid that what you're desribing is going to create on the orde&lt;/SPAN&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;r of 220000! (factorial) comparisons&lt;/SPAN&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt; and isn't likely to finish in any reasonable time.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;Complexity should be &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;O(n²) not &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;O(n!), so it should be feasable in less than a day I think.&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="background-color: #ffffff; font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;Either way, not all the tables I need to run are so big, this is an exception.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;gt;data _null_;&lt;/P&gt;&lt;P&gt;&amp;gt; set sashelp.class(keep=name);&lt;/P&gt;&lt;P&gt;&amp;gt; do i=_n_ to nobs;&lt;/P&gt;&lt;P&gt;&amp;gt;&amp;nbsp; set sashelp.class(keep=name rename=(name=_name)) nobs=nobs point=i;&lt;/P&gt;&lt;P&gt;&amp;gt;&amp;nbsp; put _all_;&lt;/P&gt;&lt;P&gt;&amp;gt; end;&lt;/P&gt;&lt;P&gt;&amp;gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;That's it I think,&lt;/P&gt;&lt;P&gt;Crashed with nobs = 5000 but that's another issue&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you !&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 22 May 2012 09:15:21 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121994#M24992</guid>
      <dc:creator>mathias</dc:creator>
      <dc:date>2012-05-22T09:15:21Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121995#M24993</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I don't understand what you mean when you refer to Crashed with nobs = 5000 ?&lt;/P&gt;&lt;P&gt;If you want faster, Hash Table is a good choice.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;data class1;
 set sashelp.class(keep=name);
 k+1;
run;
data class2;
 set sashelp.class(keep=name rename=(name=_name));
 _k+1;
run;


%let dsid=%sysfunc(open(class1));
%let nobs=%sysfunc(attrn(&amp;amp;dsid,nobs));
%let dsid=%sysfunc(close(&amp;amp;dsid));
data want;
if 0 then set class1;
 declare hash ha1(dataset:'class1',ordered:'Y');
&amp;nbsp; ha1.definekey('k');
&amp;nbsp; ha1.definedata('name');
&amp;nbsp; ha1.definedone();
if 0 then set class2;
 declare hash ha2(dataset:'class2',ordered:'Y');
&amp;nbsp; ha2.definekey('_k');
&amp;nbsp; ha2.definedata('_name');
&amp;nbsp; ha2.definedone();

do k=1 to &amp;amp;nobs; 
 ha1.find();
 do _k=k to &amp;amp;nobs;
&amp;nbsp; ha2.find();output;
 end;
end;
stop;
run;


&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 23 May 2012 02:37:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121995#M24993</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-05-23T02:37:25Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121996#M24994</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;&amp;gt; &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;I don't understand what you mean when you refer to Crashed with nobs = 5000 ?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;I always try the algorithm first on little tables (nobs=5) if it works, I increase it step by step to have a better feeling on running times&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;nobs=50, nobs=500 took less than a minute but nobs 5000 crashed (in fact i found out lateer that the crash occurs between nobs = 1400 and 1500)&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;gt; &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;If you want faster, Hash Table is a good choice.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;much much faster as I can see.&lt;/P&gt;&lt;P&gt;Crash occurs between 15 000 and 16 000 this time (lack of RAM I think)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have a few questions&lt;/P&gt;&lt;P&gt;-Is it necessary to create two same tables in the beginning?&lt;/P&gt;&lt;P&gt;-WANT is growing too big, is there a way to still have the same computation without storing it in a table and thus in RAM memory ?&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 23 May 2012 07:53:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121996#M24994</guid>
      <dc:creator>mathias</dc:creator>
      <dc:date>2012-05-23T07:53:19Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121997#M24995</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;"Crash occurs between 15 000 and 16 000 this time"&lt;/P&gt;&lt;P&gt;Did you mean SAS session stop by accident ? But for a dataset only containing 22,000 obs ,that is not a problem I think.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;"-Is it necessary to create two same tables in the beginning?"&lt;/P&gt;&lt;P&gt;It isn't necessary. You can do the same thing in the code when building the Hash Table.&lt;/P&gt;&lt;P&gt;I do this to try to enhance the code's readability . and it is very convenient if the dataset is not too big.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;"-WANT is growing too big, is there a way to still have the same computation without storing it in a table and thus in RAM memory ?"&lt;/P&gt;&lt;P&gt;Yes. It is up to what you want, you can output obs you need and discard others .&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 23 May 2012 08:51:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121997#M24995</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-05-23T08:51:54Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121998#M24996</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&amp;gt; "Did you mean SAS session stop by accident ?"&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;No, simply nothing happens.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;When you look the Physical Memory Usage, you see that nobs=16k needs more than 6Gb of RAM (cf. screenshot)&lt;IMG alt="2012-05-23_093032.png" class="jive-image-thumbnail jive-image" src="https://communities.sas.com/legacyfs/online/2079_2012-05-23_093032.png" width="450" /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;&amp;gt; "&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;Yes. It is up to what you want, you can output obs you need and discard others .&lt;/SPAN&gt;"&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif;"&gt;Thanks, I'll try to play with that and come back if I can't find it out by myself&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 23 May 2012 12:00:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121998#M24996</guid>
      <dc:creator>mathias</dc:creator>
      <dc:date>2012-05-23T12:00:17Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121999#M24997</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;mathias,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I'm going to build upon KSharp's first solution.&amp;nbsp; If these ideas work for you, maybe he can convert it to a hash version.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;First, you have to incorporate the idea that it is not necessary to output every combination.&amp;nbsp; The same DATA step that reads in all the pairs can also perform some (or all) of the comparison work, and only output the matching pairs.&amp;nbsp; Second, you have to account for the fact that both observations contain exactly the same variable names.&amp;nbsp; For comparison purposes, you'll have to rename one set of variables.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is a program that incorporates some of those ideas:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data match;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; set sashelp.class nobs=_nobs_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; if _n_ &amp;lt; _nobs_ then do _i_ = _n_ + 1 to _nobs_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; set sashelp.class (keep=name rename=(name=name2)) point=_i_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if name=name2 then do;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; original_observation = _n_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; matching_observation = _i_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; set sashelp.class point=_i_;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; output;&lt;BR /&gt;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; drop name2;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The idea is to output pairs that match, as two separate observations, using the original variable names.&amp;nbsp; Each observation of the pair contains the new variables ORIGINAL_OBSERVATION and MATCHING_OBSERVATION.&amp;nbsp; In the sample program, I'm comparing on NAME only, but you can use more than one variable to narrow down potential matches.&amp;nbsp; The bottleneck will be speed, but in theory this will chug along without other resource constraints.&amp;nbsp; As you noted, the number of comparisons is proportional to the square of the number of observations.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 23 May 2012 14:03:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/121999#M24997</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2012-05-23T14:03:19Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/122000#M24998</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Oh. That doesn't matter, as far as SAS session doesen't stop by accident. That is OK. You can run it as long as you can.&lt;/P&gt;&lt;P&gt;BTW, I recode it to make my code better ,faster, more succinct ........&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;data class;
 set sashelp.class(keep=name);
 k+1;
run;



data want;
if 0 then set class;
 declare hash ha1(dataset:'class',ordered:'Y');
 declare hiter hi1('ha1');
&amp;nbsp; ha1.definekey('k');
&amp;nbsp; ha1.definedata('k','name');
&amp;nbsp; ha1.definedone();
if 0 then set class(rename=(name=_name));
 declare hash ha2(dataset:'class(rename=(name=_name))',ordered:'Y');
 declare hiter hi2('ha2');
&amp;nbsp; ha2.definekey('k');
&amp;nbsp; ha2.definedata('_name');
&amp;nbsp; ha2.definedone();

do while(hi1.next()=0) ;
 rc=hi2.setcur(); 
 do while(rc=0);
&amp;nbsp; output;
&amp;nbsp; rc=hi2.next(); 
 end;
end; 
stop;
run;


&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ksharp&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 24 May 2012 03:35:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/122000#M24998</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2012-05-24T03:35:30Z</dc:date>
    </item>
    <item>
      <title>Re: compare each line of a data table 2 by 2</title>
      <link>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/122001#M24999</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;I almost got what I want today, thanks all for your help&lt;/P&gt;&lt;P&gt;I just need to automate a bit more and to find the best output.&lt;/P&gt;&lt;P&gt;Without output or computations, I calculated empiracally that it should take about 55 min to go through 200k observations.&lt;/P&gt;&lt;P&gt;Reasonable I think, but going through the whole table at once isn't necessary so I still have optimization options available.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is my code :&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;%let WorkingTable = DUPLICATES_TABLE;&lt;/P&gt;&lt;P&gt;%let IntervalDays = 70;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;%let vars1 = Id Germ Gender PostCode DateOfDiagnosis DateOfBirth;&lt;/P&gt;&lt;P&gt;%let vars2 = 'Id','Germ','Gender','PostCode','DateOfDiagnosis','DateOfBirth';&lt;/P&gt;&lt;P&gt;%let vars3 = Id=_Id Germ=_Germ Gender=_Gender PostCode=_PostCode DateOfDiagnosis=_DateOfDiagnosis DateOfBirth=_DateOfBirth;&lt;/P&gt;&lt;P&gt;%let vars4 = '_Id','_Germ','_Gender','_PostCode','_DateOfDiagnosis','_DateOfBirth';&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* define class for hash table */&lt;/P&gt;&lt;P&gt;&amp;nbsp; data class;&lt;/P&gt;&lt;P&gt;&amp;nbsp; set &amp;amp;WorkingTable (keep=&amp;amp;vars1);&lt;/P&gt;&lt;P&gt;&amp;nbsp; k+1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* compute nobs */&lt;/P&gt;&lt;P&gt;&amp;nbsp; %let dsid=%sysfunc(open(class));&lt;/P&gt;&lt;P&gt;&amp;nbsp; %let nobs=%sysfunc(attrn(&amp;amp;dsid,nobs));&lt;/P&gt;&lt;P&gt;&amp;nbsp; %let dsid=%sysfunc(close(&amp;amp;dsid));&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp; /* run comparison loops */&lt;/P&gt;&lt;P&gt;&amp;nbsp; data comparison (drop=rc k);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; if 0 then set class;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; declare hash ha1(dataset:'class',ordered:'Y');&amp;nbsp;&amp;nbsp; *Définir hash table;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; declare hiter hi1('ha1');&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *Définir hash iterator;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ha1.definekey('k');&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *Définir la variable identifiante;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ha1.definedata('k',&amp;amp;vars2);&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *Définir les autres variables;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ha1.definedone();&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *Ecriture de la structure de la table;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; if 0 then set class(rename=(&amp;amp;vars3));&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; declare hash ha2(dataset:"class(rename=(&amp;amp;vars3))",ordered:'Y');&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; declare hiter hi2('ha2');&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ha2.definekey('k');&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ha2.definedata(&amp;amp;vars4);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; ha2.definedone();&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; do while(hi1.next()=0);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; rc=hi2.setcur(key : k+1); *demarre l'iteration de la seconde loop a i+1;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; do while(rc=0);&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *if Gender= _Gender then put k= Gender= _Gender=;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *if Gender= _Gender then output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *put k= Gender= _Gender=;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; *output;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; if (DateOfBirth=_DateOfBirth) AND (Gender=_Gender) AND (PostCode=_PostCode) AND (&amp;amp;GermVar= _&amp;amp;GermVar) AND (abs(DateOfDiagnosis - _DateOfDiagnosis) &amp;lt; &amp;amp;IntervalDays) then output;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; rc=hi2.next();&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; end;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; stop;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; run; * no ouput, nobs=40k -&amp;gt; 2:06 min;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 29 May 2012 15:12:36 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/compare-each-line-of-a-data-table-2-by-2/m-p/122001#M24999</guid>
      <dc:creator>mathias</dc:creator>
      <dc:date>2012-05-29T15:12:36Z</dc:date>
    </item>
  </channel>
</rss>

