<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Issue with duplicate unique identifiers and multiple variables in New SAS User</title>
    <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573388#M12490</link>
    <description>&lt;P&gt;You can use PROC SUMMARY (also known as MEANS) to find the MAX of a number of variables.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc summary nway data=have ;
  by id ;
  var var1 var2 var3 ;
  output out=want max=;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Sun, 14 Jul 2019 18:47:19 GMT</pubDate>
    <dc:creator>Tom</dc:creator>
    <dc:date>2019-07-14T18:47:19Z</dc:date>
    <item>
      <title>Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572969#M12396</link>
      <description>&lt;P&gt;I’m working with a national databank with tens of thousands of observations, and I'm having issues with duplicate unique identifiers and the variables I want to keep.Â&amp;nbsp;I came across the example below just by looking at a proc print of limited observations, but I have tens of thousands of observations since this is a national databank, so itâ€™s not really feasible to look through all of them. Iâ€™d like to combine TBI = 1 and abuse = 1 in the same observation. How do I look for these discrepancies without combing through them by hand? Or at least a way to narrow down the observations I have to look at so I can then edit them by hand? I'm using SAS 9.4.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Screen Shot 2019-07-11 at 6.15.26 PM.png" style="width: 389px;"&gt;&lt;img src="https://communities.sas.com/t5/image/serverpage/image-id/31004i470A7D73432B7078/image-size/large?v=v2&amp;amp;px=999" role="button" title="Screen Shot 2019-07-11 at 6.15.26 PM.png" alt="Screen Shot 2019-07-11 at 6.15.26 PM.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jul 2019 23:19:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572969#M12396</guid>
      <dc:creator>avak</dc:creator>
      <dc:date>2019-07-11T23:19:24Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572973#M12397</link>
      <description>&lt;P&gt;Totally missed your problem. One more go. So, you want to find duplicate rows that also have TBI =1 and Abuse = 1?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
	input INC_KEY TBI Abuse;
datalines;
12345 0 1 &lt;BR /&gt;12346 0 1 &lt;BR /&gt;12346 1 1 &lt;BR /&gt;12347 0 1 &lt;BR /&gt;12347 1 0 &lt;BR /&gt;;;;
run;

proc sql;
	create table want as &lt;BR /&gt;	select *, count(INC_KEY) as duplicates 
	from have 
	group by INC_KEY
	having duplicates &amp;gt; 1 and TBI = 1 and abuse = 1;	
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;NB! This will only give you the rows where all of these things are true. So, this will only give you one row.&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;INC_KEY TBI Abuse duplicates&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;12346&amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;You can also do a subquery:&lt;/CODE&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
	create table want as 
	select *, count(INC_KEY) as duplicates 
	from have 
	where INC_KEY in (select distinct INC_KEY from have where TBI = 1 and abuse = 1)
	group by INC_KEY
	having duplicates &amp;gt; 1;	
quit;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;CODE class=" language-sas"&gt;&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;INC_KEY TBI Abuse duplicates&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;12346&amp;nbsp; &amp;nbsp;0&amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&lt;CODE class=" language-sas"&gt;12346&amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp;1&amp;nbsp; &amp;nbsp; &amp;nbsp;2&lt;/CODE&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 12 Jul 2019 00:35:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572973#M12397</guid>
      <dc:creator>heffo</dc:creator>
      <dc:date>2019-07-12T00:35:48Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572974#M12398</link>
      <description>&lt;P&gt;So your variables are 0/1?&lt;/P&gt;
&lt;P&gt;Just take the MAX value.&lt;/P&gt;</description>
      <pubDate>Fri, 12 Jul 2019 00:11:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572974#M12398</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2019-07-12T00:11:47Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572986#M12402</link>
      <description>&lt;P&gt;You have a few ways to handle this type of issue, first is to understand why it's happening - usually reasons are things recorded over time so sometimes it's best to just take the latest version of the records.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Here's one method I think works as well&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data want;
update have(obs=0) have;
by INC_KEY;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;PS. In the future please paste your data as text, when you paste it as a picture we have to type it out to test any solutions. So this solution is untested.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 12 Jul 2019 02:12:43 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/572986#M12402</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2019-07-12T02:12:43Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573381#M12487</link>
      <description>&lt;P&gt;Yes, they are yes/no variables. 1 = yes, 0 = no. How do I select the max values?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 14 Jul 2019 16:00:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573381#M12487</guid>
      <dc:creator>avak</dc:creator>
      <dc:date>2019-07-14T16:00:58Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573382#M12488</link>
      <description>&lt;P&gt;It's happening because I'm merging several databases from the National Trauma Data Bank, and I'm creating variables off of ICD-9 codes. For every ICD-9 code, they list it under duplicate unique identifiers (the inc_key variable). For example, one observation will include a certain ICD-9 code, but the next one might not, even though both observations have the same inc_key. So I'm struggling to get rid of the duplicates without accidentally deleting important data.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Sorry about not including text, will do that next time! Thanks!&lt;/P&gt;</description>
      <pubDate>Sun, 14 Jul 2019 15:58:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573382#M12488</guid>
      <dc:creator>avak</dc:creator>
      <dc:date>2019-07-14T15:58:28Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573388#M12490</link>
      <description>&lt;P&gt;You can use PROC SUMMARY (also known as MEANS) to find the MAX of a number of variables.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc summary nway data=have ;
  by id ;
  var var1 var2 var3 ;
  output out=want max=;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Sun, 14 Jul 2019 18:47:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573388#M12490</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2019-07-14T18:47:19Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573712#M12553</link>
      <description>&lt;P&gt;My variables are only 0/1 (yes/no). If I have duplicate unique identifiers (inc_key) but differing variables, is there a way to delete the duplicates and set the variables to the max value for any of the given observations of that individual inc_key?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 15 Jul 2019 22:41:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573712#M12553</guid>
      <dc:creator>avak</dc:creator>
      <dc:date>2019-07-15T22:41:55Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573713#M12554</link>
      <description>&lt;P&gt;Isn't that what the code I just posted is doing?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;You will need to adjust the code to match your dataset and variable names.&amp;nbsp; Since you didn't post any sample data there was no way for me to post exact code.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 15 Jul 2019 22:57:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573713#M12554</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2019-07-15T22:57:42Z</dc:date>
    </item>
    <item>
      <title>Re: Issue with duplicate unique identifiers and multiple variables</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573717#M12555</link>
      <description>&lt;P&gt;Yes! Sorry, I didn't run the code correctly. Thank you so much!&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 16 Jul 2019 00:11:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Issue-with-duplicate-unique-identifiers-and-multiple-variables/m-p/573717#M12555</guid>
      <dc:creator>avak</dc:creator>
      <dc:date>2019-07-16T00:11:25Z</dc:date>
    </item>
  </channel>
</rss>

