<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to remove duplicates using Proc Sql in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797043#M255815</link>
    <description>&lt;P&gt;Hi, I am new to Proc SQL, I would like to know how we can remove the duplicates from dataset and achieve the exactly same thing that I can achieve through the PROC SORT. In my example I want achieve the output exactly&amp;nbsp; looks like CLASS1 dataset using PROC SQL.&amp;nbsp; I am trying with sql&amp;nbsp; but having difficulty to achieve the same., I am able to get age, and sex but not the remaining variable. Can you please let me know what I am doing wrong. Thanks in advance.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data class;
set sashelp.class;
run;

proc sort data= class out= class1 nodupkey; by sex age ; run;

proc sql;
create table clsql as select distinct age,sex
from class  group by age,sex order by age,sex;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="SASuserlot_0-1645127590849.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/68614i1449DA4A503E9856/image-size/medium?v=v2&amp;amp;px=400" role="button" title="SASuserlot_0-1645127590849.png" alt="SASuserlot_0-1645127590849.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 17 Feb 2022 19:55:59 GMT</pubDate>
    <dc:creator>SASuserlot</dc:creator>
    <dc:date>2022-02-17T19:55:59Z</dc:date>
    <item>
      <title>How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797043#M255815</link>
      <description>&lt;P&gt;Hi, I am new to Proc SQL, I would like to know how we can remove the duplicates from dataset and achieve the exactly same thing that I can achieve through the PROC SORT. In my example I want achieve the output exactly&amp;nbsp; looks like CLASS1 dataset using PROC SQL.&amp;nbsp; I am trying with sql&amp;nbsp; but having difficulty to achieve the same., I am able to get age, and sex but not the remaining variable. Can you please let me know what I am doing wrong. Thanks in advance.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data class;
set sashelp.class;
run;

proc sort data= class out= class1 nodupkey; by sex age ; run;

proc sql;
create table clsql as select distinct age,sex
from class  group by age,sex order by age,sex;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="SASuserlot_0-1645127590849.png" style="width: 400px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/68614i1449DA4A503E9856/image-size/medium?v=v2&amp;amp;px=400" role="button" title="SASuserlot_0-1645127590849.png" alt="SASuserlot_0-1645127590849.png" /&gt;&lt;/span&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 17 Feb 2022 19:55:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797043#M255815</guid>
      <dc:creator>SASuserlot</dc:creator>
      <dc:date>2022-02-17T19:55:59Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797053#M255824</link>
      <description>&lt;P&gt;Since Proc Sort will create different datasets given a different order of the data I think you need to consider and describe in much more excruciating detail what your real use case may be.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;The initial order of the data set in Proc sort with NODUPKEY will affect the result. Here is an example.&lt;/P&gt;
&lt;PRE&gt;data class;
set sashelp.class;
run;

proc sort data= class out= class1 nodupkey; 
   by sex age ; 
run;

proc sort data=class;
   by height;
run;

proc sort data= class out= class2 nodupkey; 
   by sex age ; 
run;
&lt;/PRE&gt;
&lt;P&gt;If Class1 and Class2 are the same when you run this code I would be very surprised.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;SQL has a very similar data order issue as it is not designed to process data in any given sequence. Sometimes with large data sets processing the same code on unchanged data sets will result in different results if order is considered.&lt;/P&gt;</description>
      <pubDate>Thu, 17 Feb 2022 20:23:10 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797053#M255824</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2022-02-17T20:23:10Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797064#M255828</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/13884"&gt;@ballardw&lt;/a&gt;&amp;nbsp;. Thank you for response. I am not pro with SQL,&amp;nbsp; So I am learning how I can I achieve the same that we can do by proc sort to know&amp;nbsp; how we can avoid duplicates.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;For your question, When I ran your code, I see&amp;nbsp; Class1 and Class2 are not same.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Is it possible for you provide and example where we can remove the duplicates and achieve same using proc sort and proc sql, That will&amp;nbsp; be greatly appreciated. I think class dataset may not be good .Thanks again.&lt;/P&gt;</description>
      <pubDate>Thu, 17 Feb 2022 20:38:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797064#M255828</guid>
      <dc:creator>SASuserlot</dc:creator>
      <dc:date>2022-02-17T20:38:58Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797077#M255834</link>
      <description>&lt;P&gt;You need to define a deduplication rule that precisely selects the rows you want so that it will work the same in PROC SORT and PROC SQL.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;For example with the CLASS dataset you could say: create a table that contains the tallest student for each age and sex value combination. As long as the data has unique values for height (which I think it has), you now have a precise definition that you can code&amp;nbsp;in PROC SORT and PROC SQL and you will get the same result.&lt;/P&gt;</description>
      <pubDate>Thu, 17 Feb 2022 21:07:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797077#M255834</guid>
      <dc:creator>SASKiwi</dc:creator>
      <dc:date>2022-02-17T21:07:47Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797100#M255842</link>
      <description>&lt;P&gt;Got it thank you. can you provide an example how to remove duplicates using the Proc sql, may at two variable level. I do have idea using 'distinct' for single variable level duplication removal. Thanks&lt;/P&gt;</description>
      <pubDate>Thu, 17 Feb 2022 22:02:26 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797100#M255842</guid>
      <dc:creator>SASuserlot</dc:creator>
      <dc:date>2022-02-17T22:02:26Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797107#M255847</link>
      <description>&lt;P&gt;If you want the same result in SORT and SQL, you need to design a rule for which duplicate to select, and then you can force both to implement that rule. Without forcing a specific rule, the results will be indeterminate (even in SORT, depending on storage engines).&lt;/P&gt;
&lt;P&gt;So we first need that rule.&lt;/P&gt;</description>
      <pubDate>Thu, 17 Feb 2022 22:34:31 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797107#M255847</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2022-02-17T22:34:31Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797111#M255851</link>
      <description>&lt;P&gt;Here is an SQL approach for the rule I previously described:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
  create table want as
  select A.*
  from sashelp.class as A
  inner join
  (select  age
          ,sex 
          ,max(height) as Max_Height
   from sashelp.class
   group by age
           ,sex
  ) as B 
  on A.age = B.age
  and A.sex = B.sex
  and A.height = B.Max_Height
  order by sex, age
  ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 17 Feb 2022 23:22:49 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797111#M255851</guid>
      <dc:creator>SASKiwi</dc:creator>
      <dc:date>2022-02-17T23:22:49Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove duplicates using Proc Sql</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797285#M255934</link>
      <description>&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 18 Feb 2022 17:22:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-duplicates-using-Proc-Sql/m-p/797285#M255934</guid>
      <dc:creator>SASuserlot</dc:creator>
      <dc:date>2022-02-18T17:22:17Z</dc:date>
    </item>
  </channel>
</rss>

