<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Removing duplicate rows with case sensitive values in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867366#M342564</link>
    <description>&lt;P&gt;Something like this?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
  input @1 ID $3. @5 FirstName $8. @12 LastName $8.;
  FirstName = propcase(FirstName);
  LastName = propcase(LastName);
datalines;
001 John    Reed
001 JOHN    REED
002 Mitchel James
003 Steph   Ania
004 King    Mon
004 KING    MON
005 Valery  Short
012 ALMA    JACOBS
012 Alma    Jacobs
017 Josh    Ryan
017 Jash    Ryan
;
run;

proc sort data = Have 
          out  = Want nodupkey;
  by ID LastName FirstName;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Fri, 31 Mar 2023 01:03:04 GMT</pubDate>
    <dc:creator>SASKiwi</dc:creator>
    <dc:date>2023-03-31T01:03:04Z</dc:date>
    <item>
      <title>Removing duplicate rows with case sensitive values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867364#M342562</link>
      <description>&lt;P&gt;I have a dataset with duplicate names, yet same IDs. It looks like this. I received this dataset as is, so not sure why the names are case sensitive.&amp;nbsp;&lt;/P&gt;&lt;TABLE border="1"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;ID&lt;/TD&gt;&lt;TD&gt;FirstName&lt;/TD&gt;&lt;TD&gt;LastName&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;John&lt;/TD&gt;&lt;TD&gt;Reed&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;JOHN&lt;/TD&gt;&lt;TD&gt;REED&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;Mitchel&lt;/TD&gt;&lt;TD&gt;James&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;003&lt;/TD&gt;&lt;TD&gt;Steph&lt;/TD&gt;&lt;TD&gt;Ania&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;004&lt;/TD&gt;&lt;TD&gt;King&lt;/TD&gt;&lt;TD&gt;Mon&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;004&lt;/TD&gt;&lt;TD&gt;KING&lt;/TD&gt;&lt;TD&gt;MON&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;005&lt;/TD&gt;&lt;TD&gt;Valery&lt;/TD&gt;&lt;TD&gt;Short&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;012&lt;/TD&gt;&lt;TD&gt;ALMA&lt;/TD&gt;&lt;TD&gt;JACOBS&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;012&lt;/TD&gt;&lt;TD&gt;Alma&lt;/TD&gt;&lt;TD&gt;Jacobs&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;017&lt;/TD&gt;&lt;TD&gt;Josh&lt;/TD&gt;&lt;TD&gt;Ryan&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;017&lt;/TD&gt;&lt;TD&gt;Jash&lt;/TD&gt;&lt;TD&gt;Ryan&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;I need to identify those with the exact same spelling for firstname and lastname variables to remove the duplicates. I want the final dataset to look like this table below. However, given that I have around 800 rows and the names are duplicates due to being case sensitive (except for ID 017, which is a duplicate due to a typo), I'm unsure of how to approach this. Does anyone have any suggestions and/or solutions?&lt;/P&gt;&lt;TABLE border="1"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD&gt;ID&lt;/TD&gt;&lt;TD&gt;FirstName&lt;/TD&gt;&lt;TD&gt;LastName&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;001&lt;/TD&gt;&lt;TD&gt;John&lt;/TD&gt;&lt;TD&gt;Reed&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;002&lt;/TD&gt;&lt;TD&gt;Mitchel&lt;/TD&gt;&lt;TD&gt;James&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;003&lt;/TD&gt;&lt;TD&gt;Steph&lt;/TD&gt;&lt;TD&gt;Ania&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;004&lt;/TD&gt;&lt;TD&gt;King&lt;/TD&gt;&lt;TD&gt;Mon&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;&lt;P&gt;005&lt;/P&gt;&lt;/TD&gt;&lt;TD&gt;Valery&lt;/TD&gt;&lt;TD&gt;Short&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;012&lt;/TD&gt;&lt;TD&gt;Alma&lt;/TD&gt;&lt;TD&gt;Jacobs&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;017&lt;/TD&gt;&lt;TD&gt;Josh&amp;nbsp;&lt;/TD&gt;&lt;TD&gt;Ryan&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;017&lt;/TD&gt;&lt;TD&gt;Jash&lt;/TD&gt;&lt;TD&gt;Ryan&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;</description>
      <pubDate>Fri, 31 Mar 2023 00:12:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867364#M342562</guid>
      <dc:creator>amamiche67</dc:creator>
      <dc:date>2023-03-31T00:12:54Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with case sensitive values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867366#M342564</link>
      <description>&lt;P&gt;Something like this?&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data Have;
  input @1 ID $3. @5 FirstName $8. @12 LastName $8.;
  FirstName = propcase(FirstName);
  LastName = propcase(LastName);
datalines;
001 John    Reed
001 JOHN    REED
002 Mitchel James
003 Steph   Ania
004 King    Mon
004 KING    MON
005 Valery  Short
012 ALMA    JACOBS
012 Alma    Jacobs
017 Josh    Ryan
017 Jash    Ryan
;
run;

proc sort data = Have 
          out  = Want nodupkey;
  by ID LastName FirstName;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 31 Mar 2023 01:03:04 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867366#M342564</guid>
      <dc:creator>SASKiwi</dc:creator>
      <dc:date>2023-03-31T01:03:04Z</dc:date>
    </item>
    <item>
      <title>Re: Removing duplicate rows with case sensitive values</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867367#M342565</link>
      <description>&lt;P&gt;SASKiwis idea will work if you don't need to retain the first case in the data. Here is what I cam up with. This uses a hash object so will only scale to the size of the available memory on your system.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  length ID 8 FirstName LastName $8;
  input ID FirstName LastName;
cards;
001 John Reed
001 JOHN REED
002 Mitchel James
003 Steph Ania
004 King Mon
004 KING MON
005 Valery Short
012 ALMA JACOBS
012 Alma Jacobs
017 Josh Ryan
017 Jash Ryan
;
run;

data want;
    if _n_ = 1 then do;
        declare hash h();
        rc = h.defineKey('ID','FirstName_upcase','LastName_upcase');
        rc = h.defineDone();
    end;

    set have;

    FirstName_upcase = upcase(FirstName);
    LastName_upcase = upcase(LastName);

    rc = h.find();
    if (rc ~= 0) then do;
        rc = h.add();
        output;
    end;

  keep ID FirstName LastName;

run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;This will keep the first observation with unique spelling and retain the case. If the varaible was "JoHn" in the first observation the propcase method would make this "John".&lt;/P&gt;</description>
      <pubDate>Fri, 31 Mar 2023 01:17:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Removing-duplicate-rows-with-case-sensitive-values/m-p/867367#M342565</guid>
      <dc:creator>SimonDawson</dc:creator>
      <dc:date>2023-03-31T01:17:27Z</dc:date>
    </item>
  </channel>
</rss>

