<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to remove all the duplicated ids from one dataset according to unique ids in another data in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956775#M373546</link>
    <description>&lt;P&gt;I dataset1 is not sorted, and you want to preserve the original order of dataset1, then you can code a single-step solution by storing dataset2 in a hash object.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set dataset1;
  if _n_=1 then do;
    declare hash h (dataset:'dataset2');
      h.definekey('id');
      h.definedone();
  end;
  if h.check()^=0 then output;  /*If not found in the hash object*/
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Tue, 21 Jan 2025 17:58:34 GMT</pubDate>
    <dc:creator>mkeintz</dc:creator>
    <dc:date>2025-01-21T17:58:34Z</dc:date>
    <item>
      <title>How to remove all the duplicated ids from one dataset according to unique ids in another data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956707#M373522</link>
      <description>&lt;P&gt;Here are two simple sample datasets. Data 1 has duplicated IDs A B and C.&amp;nbsp;&lt;/P&gt;
&lt;TABLE dir="ltr" border="1" cellspacing="0" cellpadding="0" data-sheets-root="1" data-sheets-baot="1"&gt;&lt;COLGROUP&gt;&lt;COL width="100" /&gt;&lt;COL width="100" /&gt;&lt;/COLGROUP&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;id&lt;/TD&gt;
&lt;TD&gt;weight&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;A&lt;/TD&gt;
&lt;TD&gt;30&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;A&lt;/TD&gt;
&lt;TD&gt;36&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;B&lt;/TD&gt;
&lt;TD&gt;21&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;B&lt;/TD&gt;
&lt;TD&gt;25&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;B&lt;/TD&gt;
&lt;TD&gt;30&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;40&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;41&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;43&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;40&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;Data 2 has unique id&lt;/P&gt;
&lt;TABLE dir="ltr" border="1" cellspacing="0" cellpadding="0" data-sheets-root="1" data-sheets-baot="1"&gt;&lt;COLGROUP&gt;&lt;COL width="100" /&gt;&lt;/COLGROUP&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;id&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;A&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;I want to remove all duplicated ids (A) in Data 1 based on the unique ids in Data 2 (A). So the result is&lt;/P&gt;
&lt;TABLE dir="ltr" border="1" cellspacing="0" cellpadding="0" data-sheets-root="1" data-sheets-baot="1"&gt;&lt;COLGROUP&gt;&lt;COL width="100" /&gt;&lt;COL width="100" /&gt;&lt;/COLGROUP&gt;
&lt;TBODY&gt;
&lt;TR&gt;
&lt;TD&gt;id&lt;/TD&gt;
&lt;TD&gt;weight&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;B&lt;/TD&gt;
&lt;TD&gt;21&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;B&lt;/TD&gt;
&lt;TD&gt;25&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;B&lt;/TD&gt;
&lt;TD&gt;30&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;40&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;41&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;43&lt;/TD&gt;
&lt;/TR&gt;
&lt;TR&gt;
&lt;TD&gt;C&lt;/TD&gt;
&lt;TD&gt;40&lt;/TD&gt;
&lt;/TR&gt;
&lt;/TBODY&gt;
&lt;/TABLE&gt;
&lt;P&gt;This is a very simple example. However, my Data 1 may have thousands of duplicated IDs and Data 2 may have hundreds of unique IDs. Many thanks!&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 21 Jan 2025 00:01:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956707#M373522</guid>
      <dc:creator>SeaMoon_168</dc:creator>
      <dc:date>2025-01-21T00:01:58Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove all the duplicated ids from one dataset according to unique ids in another data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956708#M373523</link>
      <description>&lt;P&gt;Not sure how the duplicates play into this.&amp;nbsp; Sounds like you just want the observations from the first dataset where the ID is NOT in the second dataset.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If they are sorted it is a simple merge.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  merge data1 data2(in=in2);
  by id;
  if not in2;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;If not then it might be simpler to use PROC SQL (since it will sort for you).&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table want as 
select * from data1
where id not in (select id from data2)
;
quit;
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 21 Jan 2025 00:12:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956708#M373523</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2025-01-21T00:12:17Z</dc:date>
    </item>
    <item>
      <title>Re: How to remove all the duplicated ids from one dataset according to unique ids in another data</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956775#M373546</link>
      <description>&lt;P&gt;I dataset1 is not sorted, and you want to preserve the original order of dataset1, then you can code a single-step solution by storing dataset2 in a hash object.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
  set dataset1;
  if _n_=1 then do;
    declare hash h (dataset:'dataset2');
      h.definekey('id');
      h.definedone();
  end;
  if h.check()^=0 then output;  /*If not found in the hash object*/
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 21 Jan 2025 17:58:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-remove-all-the-duplicated-ids-from-one-dataset-according/m-p/956775#M373546</guid>
      <dc:creator>mkeintz</dc:creator>
      <dc:date>2025-01-21T17:58:34Z</dc:date>
    </item>
  </channel>
</rss>

