<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to Sort Out Certain Duplicates in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687531#M208723</link>
    <description>&lt;P&gt;Assuming that email is held in variable email, and the staff student is held in a column called staff_student.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;PROC SQL;
  CREATE TABLE part1 AS SELECT
  *, count(distinct(staff_student)) as nrc
  FROM have
  GROUP BY email;
QUIT;
DATA part2;
  SET part1;
  IF nrc=2 THEN staff_studuent="Staff/Student";
  DROP nrc;
RUN;
PROC SQL;
  CREATE TABLE want AS SELECT
  distinct *
  FROM part2;
QUIT;&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;You would then end with a record labeled Staff/Student for these, assuming these are the only things that are different in the record. &lt;/P&gt;</description>
    <pubDate>Tue, 29 Sep 2020 14:38:08 GMT</pubDate>
    <dc:creator>SwissC</dc:creator>
    <dc:date>2020-09-29T14:38:08Z</dc:date>
    <item>
      <title>How to Sort Out Certain Duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687526#M208720</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a dataset where I am trying to sort out specific duplicates. I am creating an email list from an appended dataset and because certain people are labeled as both staff and students, we have duplicate records. Here is one such case:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="dpachorek_0-1601389103979.png" style="width: 674px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/49934i79E409FD346FB82A/image-dimensions/674x59?v=v2" width="674" height="59" role="button" title="dpachorek_0-1601389103979.png" alt="dpachorek_0-1601389103979.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="font-family: inherit;"&gt;This is not the case for everyone but I am trying to sort out the duplicate records that are labeled as student since their staff label takes precedence.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any help? Thanks!&lt;/P&gt;</description>
      <pubDate>Tue, 29 Sep 2020 14:27:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687526#M208720</guid>
      <dc:creator>dpachorek</dc:creator>
      <dc:date>2020-09-29T14:27:47Z</dc:date>
    </item>
    <item>
      <title>Re: How to Sort Out Certain Duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687531#M208723</link>
      <description>&lt;P&gt;Assuming that email is held in variable email, and the staff student is held in a column called staff_student.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;PROC SQL;
  CREATE TABLE part1 AS SELECT
  *, count(distinct(staff_student)) as nrc
  FROM have
  GROUP BY email;
QUIT;
DATA part2;
  SET part1;
  IF nrc=2 THEN staff_studuent="Staff/Student";
  DROP nrc;
RUN;
PROC SQL;
  CREATE TABLE want AS SELECT
  distinct *
  FROM part2;
QUIT;&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;You would then end with a record labeled Staff/Student for these, assuming these are the only things that are different in the record. &lt;/P&gt;</description>
      <pubDate>Tue, 29 Sep 2020 14:38:08 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687531#M208723</guid>
      <dc:creator>SwissC</dc:creator>
      <dc:date>2020-09-29T14:38:08Z</dc:date>
    </item>
    <item>
      <title>Re: How to Sort Out Certain Duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687539#M208727</link>
      <description>&lt;P&gt;You're right where the emails are under a variable called email. However, staff and student are apart of a variable called group that has 4 options (Student, Staff, Faculty, and Lib_Faculty).&lt;/P&gt;</description>
      <pubDate>Tue, 29 Sep 2020 15:01:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687539#M208727</guid>
      <dc:creator>dpachorek</dc:creator>
      <dc:date>2020-09-29T15:01:28Z</dc:date>
    </item>
    <item>
      <title>Re: How to Sort Out Certain Duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687542#M208730</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA part1;
  SET have;
  IF group in("Staff" "Student") THEN cnt=1;
    ELSE cnt=0;
RUN;

PROC SQL;
  CREATE TABLE part2 AS SELECT
  *, sum(staff_student) as nrc
  FROM part1
  GROUP BY email;
QUIT;
DATA part3;
  SET part2;
  IF nrc=2 THEN staff_studuent="Staff/Student";
  DROP nrc cnt;
RUN;
PROC SQL;
  CREATE TABLE want AS SELECT
  distinct *
  FROM part3;
QUIT;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Does this fix it?&lt;/P&gt;</description>
      <pubDate>Tue, 29 Sep 2020 15:11:03 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687542#M208730</guid>
      <dc:creator>SwissC</dc:creator>
      <dc:date>2020-09-29T15:11:03Z</dc:date>
    </item>
    <item>
      <title>Re: How to Sort Out Certain Duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687546#M208731</link>
      <description>&lt;P&gt;Another option would be to transpose the data.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;DATA part1;
  SET have;
  x=1;
RUN;

PROC TRANSPOSE data=part1 out=want;
  BY email (add all other variables except group and x);
  ID group;
  VAR x;
RUN;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;This would then give a dataset with flags for each department and actually is prob a better way.&lt;/P&gt;</description>
      <pubDate>Tue, 29 Sep 2020 15:17:12 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687546#M208731</guid>
      <dc:creator>SwissC</dc:creator>
      <dc:date>2020-09-29T15:17:12Z</dc:date>
    </item>
    <item>
      <title>Re: How to Sort Out Certain Duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687556#M208735</link>
      <description>&lt;P&gt;Yes! Thank you. Now, I can easily sort out based off these flags.&lt;/P&gt;</description>
      <pubDate>Tue, 29 Sep 2020 15:40:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-to-Sort-Out-Certain-Duplicates/m-p/687556#M208735</guid>
      <dc:creator>dpachorek</dc:creator>
      <dc:date>2020-09-29T15:40:55Z</dc:date>
    </item>
  </channel>
</rss>

