<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to delete duplicate observations by another variable in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841868#M332881</link>
    <description>&lt;P&gt;I'm confused.&amp;nbsp; You mention 'per participant' but your code doesn't reference a ParticipantID.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Does your data have ParticipantID, StudyID, CareDate?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Can you show, say, 10 sample records of the data you have (with duplicates) and the data you want after de-duplicating?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It's not clear to me if you want the output to be one record per StudyID or one record per ParticipantID.&lt;/P&gt;</description>
    <pubDate>Tue, 01 Nov 2022 13:53:05 GMT</pubDate>
    <dc:creator>Quentin</dc:creator>
    <dc:date>2022-11-01T13:53:05Z</dc:date>
    <item>
      <title>How to delete duplicate observations by another variable</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841853#M332876</link>
      <description>&lt;P&gt;Hello,&lt;BR /&gt;&lt;BR /&gt;I am trying to figure out how to delete duplicate observations in my data set based on another variable.&lt;/P&gt;&lt;P&gt;I have multiple 'studyID's' per participant but I only want to keep the most recent one based on the 'caredate'.&amp;nbsp; Here is my code but my results are still sowing multiple obs per ID if the 'caredate' as different.&amp;nbsp; Please advise.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;proc sort data= PPD;&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;by CAREDATE;&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;Run;&lt;BR /&gt;&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;proc sort data= PPD&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;Dupout=PPD_Dupobs&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;Nodupkey;&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;by STUDYID;&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;Run;&lt;BR /&gt;&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;PROC PRINT DATA=PPD (Obs=100);&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you for your help!&lt;/P&gt;</description>
      <pubDate>Tue, 01 Nov 2022 13:16:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841853#M332876</guid>
      <dc:creator>mherrick</dc:creator>
      <dc:date>2022-11-01T13:16:54Z</dc:date>
    </item>
    <item>
      <title>Re: How to delete duplicate observations by another variable</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841854#M332877</link>
      <description>&lt;P&gt;Here is a classic way to do this with Proc SQL&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input studyID $ caredate :date9.;
format caredate date9.;
datalines;
1 01jan2014 
1 01jan2015 
1 01jan2016 
2 01jan2017 
2 01jan2018 
2 01jan2019 
3 01jan2020 
3 01jan2021 
3 01jan2022 
;

proc sql;
   create table want as
   select * from have
   group by studyID
   having max(caredate) = caredate
   ;
quit;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;U&gt;&lt;STRONG&gt;Result:&lt;/STRONG&gt;&lt;/U&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;studyID caredate
1       01JAN2016
2       01JAN2019
3       01JAN2022&lt;/PRE&gt;</description>
      <pubDate>Tue, 01 Nov 2022 13:23:53 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841854#M332877</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2022-11-01T13:23:53Z</dc:date>
    </item>
    <item>
      <title>Re: How to delete duplicate observations by another variable</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841855#M332878</link>
      <description>&lt;P&gt;If your input dataset is unsorted, first sort BY StudyID and CareDate (descending).&lt;/P&gt;
&lt;P&gt;Then another sort, with nodupkey with only BY StudyID.&lt;/P&gt;</description>
      <pubDate>Tue, 01 Nov 2022 13:22:34 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841855#M332878</guid>
      <dc:creator>LinusH</dc:creator>
      <dc:date>2022-11-01T13:22:34Z</dc:date>
    </item>
    <item>
      <title>Re: How to delete duplicate observations by another variable</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841856#M332879</link>
      <description>&lt;P&gt;Or you can do this if you want to make sure that you only have 1 obs per studyID, regardless of ties.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sort data = have;
   by studyID caredate;
run;

data want;
   set have;
   by studyID caredate;
   if last.studyID;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 01 Nov 2022 13:23:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841856#M332879</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2022-11-01T13:23:04Z</dc:date>
    </item>
    <item>
      <title>Re: How to delete duplicate observations by another variable</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841868#M332881</link>
      <description>&lt;P&gt;I'm confused.&amp;nbsp; You mention 'per participant' but your code doesn't reference a ParticipantID.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Does your data have ParticipantID, StudyID, CareDate?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Can you show, say, 10 sample records of the data you have (with duplicates) and the data you want after de-duplicating?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;It's not clear to me if you want the output to be one record per StudyID or one record per ParticipantID.&lt;/P&gt;</description>
      <pubDate>Tue, 01 Nov 2022 13:53:05 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-delete-duplicate-observations-by-another-variable/m-p/841868#M332881</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2022-11-01T13:53:05Z</dc:date>
    </item>
  </channel>
</rss>

