<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Checking duplicates in sas in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Checking-duplicates-in-sas/m-p/863350#M341049</link>
    <description>&lt;P&gt;I have a dataset with Medical appointments taken from the Kaggle website.&amp;nbsp;&lt;/P&gt;&lt;P&gt;The dataset I am using contains 110527 medical appointments and their 14 associated variables ( PatientId, AppointmentID, Gender, ScheduledDay, AppointmentDay, Age, Neighbourhood, Scholarship, Hypertension, Diabetes, Alcoholism, Handcap, SMS_received, No-show ).&amp;nbsp; The original analysis was done using Python and it states that there is no duplicate value in the data.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am trying to use SAS to find duplicate entries in the dataset and confirm the claim in the original analysis i.e there is no duplicate entry because this is appointment data that may have multiple entries for the same individual but on different dates.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I tried using the Proc Sort procedure&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;BR /&gt;proc sort data=eda_project1 nodupkey dupeout=dupes;&lt;BR /&gt;by patientid;&lt;BR /&gt;run;&lt;/PRE&gt;&lt;P&gt;but there are so many entries sorted out in dupes. Can someone guide me on how I can confirm if there are any duplicates or not?&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 10 Mar 2023 04:27:09 GMT</pubDate>
    <dc:creator>a_matharu</dc:creator>
    <dc:date>2023-03-10T04:27:09Z</dc:date>
    <item>
      <title>Checking duplicates in sas</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Checking-duplicates-in-sas/m-p/863350#M341049</link>
      <description>&lt;P&gt;I have a dataset with Medical appointments taken from the Kaggle website.&amp;nbsp;&lt;/P&gt;&lt;P&gt;The dataset I am using contains 110527 medical appointments and their 14 associated variables ( PatientId, AppointmentID, Gender, ScheduledDay, AppointmentDay, Age, Neighbourhood, Scholarship, Hypertension, Diabetes, Alcoholism, Handcap, SMS_received, No-show ).&amp;nbsp; The original analysis was done using Python and it states that there is no duplicate value in the data.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am trying to use SAS to find duplicate entries in the dataset and confirm the claim in the original analysis i.e there is no duplicate entry because this is appointment data that may have multiple entries for the same individual but on different dates.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I tried using the Proc Sort procedure&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;BR /&gt;proc sort data=eda_project1 nodupkey dupeout=dupes;&lt;BR /&gt;by patientid;&lt;BR /&gt;run;&lt;/PRE&gt;&lt;P&gt;but there are so many entries sorted out in dupes. Can someone guide me on how I can confirm if there are any duplicates or not?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 04:27:09 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Checking-duplicates-in-sas/m-p/863350#M341049</guid>
      <dc:creator>a_matharu</dc:creator>
      <dc:date>2023-03-10T04:27:09Z</dc:date>
    </item>
    <item>
      <title>Re: Checking duplicates in sas</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Checking-duplicates-in-sas/m-p/863356#M341051</link>
      <description>&lt;P&gt;Include appointmentday in your BY statement.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 06:01:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Checking-duplicates-in-sas/m-p/863356#M341051</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2023-03-10T06:01:12Z</dc:date>
    </item>
  </channel>
</rss>

