<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Macro-loop to look for duplicates in a dataset based by costumer and dates in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930397#M366058</link>
    <description>&lt;BLOCKQUOTE&gt;
&lt;P&gt;I know fo sure that it is easy to just use nodupkey (can't remember) but I want it to be a macro-loop.&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Why?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I find it almost impossible to imagine that a macro could work faster than the NODUPKEY option in PROC SORT.&lt;/P&gt;</description>
    <pubDate>Fri, 31 May 2024 10:17:05 GMT</pubDate>
    <dc:creator>PaigeMiller</dc:creator>
    <dc:date>2024-05-31T10:17:05Z</dc:date>
    <item>
      <title>Macro-loop to look for duplicates in a dataset based by costumer and dates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930382#M366053</link>
      <description>&lt;P&gt;Hi,&lt;BR /&gt;&lt;BR /&gt;I try to generate a code that looks for duplicates in a dataset based by costumer and dates, here is a random test-data I created:&lt;/P&gt;&lt;PRE&gt;&amp;nbsp;&lt;/PRE&gt;&lt;PRE&gt;&lt;CODE class=""&gt;data testdata;
    do year = 2014 to 2023;

        date = input(cats(put(year, 4.), '1231'), yymmdd8.);

        do i = 1 to 25;
            customer = cats('Customer', (year - 2014) * 25 + i);
            output;
            if i &amp;lt;= 10 then do;
                customer = cats('Customer', (year - 2014) * 25 + i);
                output;
            end;
        end;
    end;
    format date yymmddn8.;
    drop i year;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;But my orginal data have 300,000 obs,&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;and I want to look for duplicates between year=20201231,20211231,20221231 and 20231231, only. I tried this one:.. But it takes ages since it loop around for every costumer. I know fo sure that it is easy to just use nodupkey (can't remember) but I want it to be a macro-loop.&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;macro check_duplicates(date_list, dataset);

    %do i = 1 %to %sysfunc(countw(&amp;amp;date_list));
        %let date = %scan(&amp;amp;date_list, &amp;amp;i);
 
        %if %sysfunc(inputn(&amp;amp;date, yymmdd8.)) ne . %then %do;
 
            data _duplicates_;
                set &amp;amp;dataset;
                where date_id = &amp;amp;date;
                by date_id;
                retain count;
                if first.date_id then count = 0;
                count + 1;
                if count &amp;gt; 1 then do;
                    put coustumer;
                    put date_id;
                    output; /* Output observation if duplicate */
                end;
                if last then put "No dup found for &amp;amp;date.";
            run;
 
            proc print data=_duplicates_ noobs;
                title "Dup found for date &amp;amp;date";
            run;
 
        %end;
        %else %put No date found for &amp;amp;date.;
 
    %end;
%mend;
 
%check_duplicates(20201231 20221231 202331, lib.dataset);&lt;/PRE&gt;&lt;P&gt;&lt;CODE class=""&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 31 May 2024 08:50:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930382#M366053</guid>
      <dc:creator>melhaf</dc:creator>
      <dc:date>2024-05-31T08:50:02Z</dc:date>
    </item>
    <item>
      <title>Re: Macro-loop to look for duplicates in a dataset based by costumer and dates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930388#M366056</link>
      <description>&lt;P&gt;Here is a suggestion, but not sure what the point is of the dataset _duplicates_ in your example, since it gets overridden for each date in the list.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You could try this code, which works from the testdata dataset you create. If you want the customers who are duplicates, you can find them in the _AllDups_ table:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;%macro check_duplicates(date_list, dataset);

   proc sort data=&amp;amp;dataset.; by date customer; run;

   data _AllDups_;
      set &amp;amp;dataset.; 
      by date customer; 
      if first.customer then c=0;
      c+1;
      if last.customer and c&amp;gt;1 then output;
   run;

    %do i = 1 %to %sysfunc(countw(&amp;amp;date_list));
        %let date = %scan(&amp;amp;date_list, &amp;amp;i);
 
        %if %sysfunc(inputn(&amp;amp;date, yymmdd8.)) ne . %then %do;

        data _null_;
           dt=input("&amp;amp;date.", yymmdd8.);
           put dt= date9.;
           if _N_ = 1 then do;
               length date 8;
               declare hash stats(dataset:"AllDups");
               stats.defineKey('date');
               stats.defineDone();
            end;
            rc=stats.Check(key:dt);
            if rc=0 then put "Dup found for &amp;amp;date.";
            else put "No dup found for &amp;amp;date.";
         run;

         %end;
   %end;

%mend;

%check_duplicates(20201231 20221231 20231231, testdata);
&lt;/PRE&gt;</description>
      <pubDate>Fri, 31 May 2024 09:29:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930388#M366056</guid>
      <dc:creator>JB1_DK</dc:creator>
      <dc:date>2024-05-31T09:29:25Z</dc:date>
    </item>
    <item>
      <title>Re: Macro-loop to look for duplicates in a dataset based by costumer and dates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930397#M366058</link>
      <description>&lt;BLOCKQUOTE&gt;
&lt;P&gt;I know fo sure that it is easy to just use nodupkey (can't remember) but I want it to be a macro-loop.&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Why?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I find it almost impossible to imagine that a macro could work faster than the NODUPKEY option in PROC SORT.&lt;/P&gt;</description>
      <pubDate>Fri, 31 May 2024 10:17:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Macro-loop-to-look-for-duplicates-in-a-dataset-based-by-costumer/m-p/930397#M366058</guid>
      <dc:creator>PaigeMiller</dc:creator>
      <dc:date>2024-05-31T10:17:05Z</dc:date>
    </item>
  </channel>
</rss>

