<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: finding duplicates in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23374#M5158</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;You could use SQL.&amp;nbsp; For example you could decide to pick the smallest id for each email address.&lt;/P&gt;&lt;P&gt;(NOTE: are you sure that the different schools are using different ranges of id values?&amp;nbsp; Otherwise you might have the same id for two different instructors.)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE __default_attr="plain" __jive_macro_name="code" class="jive_text_macro jive_macro_code"&gt;&lt;P&gt;proc sql ;&lt;/P&gt;&lt;P&gt;&amp;nbsp; create table uniqueid as &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; select distinct email, min(tid) as tid &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; from mutlipleid&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; group by email&lt;/P&gt;&lt;P&gt;&amp;nbsp; ;&lt;/P&gt;&lt;P&gt;quit;&lt;/P&gt;&lt;/PRE&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Tue, 03 Jan 2012 19:20:23 GMT</pubDate>
    <dc:creator>Tom</dc:creator>
    <dc:date>2012-01-03T19:20:23Z</dc:date>
    <item>
      <title>finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23373#M5157</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;i have data as seen below.&amp;nbsp; Instructors have a unique email address but may have multiple IDs if they serve more than one school.&amp;nbsp; The restriction on the file I'm submitting is only one ID per instructor.&amp;nbsp; I want to find all cases such as &lt;A href="mailto:hsmith@yahoo.com"&gt;hsmith@yahoo.com&lt;/A&gt; and give him the same ID in all obs.&amp;nbsp; (It doesn't matter which one as long as he only has one.)&amp;nbsp; Right now, I'm running a PROC FREQ with a TABLE statement of email*tchid/list and doing a visual inspection (not exactly an elegant solution.)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;email&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; tchid&lt;/P&gt;&lt;P&gt;&lt;A href="mailto:jdoe@aol.com"&gt;jdoe@aol.com&lt;/A&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 01&lt;/P&gt;&lt;P&gt;&lt;A href="mailto:sjones@hotmail.com"&gt;sjones@hotmail.com&lt;/A&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 02&lt;/P&gt;&lt;P&gt;&lt;A href="mailto:hsmith@yahoo.com"&gt;hsmith@yahoo.com&lt;/A&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 03&lt;/P&gt;&lt;P&gt;&lt;A href="mailto:hsmith@yahoo.com"&gt;hsmith@yahoo.com&lt;/A&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 04&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:15:20 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23373#M5157</guid>
      <dc:creator>GreggB</dc:creator>
      <dc:date>2012-01-03T19:15:20Z</dc:date>
    </item>
    <item>
      <title>Re: finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23374#M5158</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;You could use SQL.&amp;nbsp; For example you could decide to pick the smallest id for each email address.&lt;/P&gt;&lt;P&gt;(NOTE: are you sure that the different schools are using different ranges of id values?&amp;nbsp; Otherwise you might have the same id for two different instructors.)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE __default_attr="plain" __jive_macro_name="code" class="jive_text_macro jive_macro_code"&gt;&lt;P&gt;proc sql ;&lt;/P&gt;&lt;P&gt;&amp;nbsp; create table uniqueid as &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; select distinct email, min(tid) as tid &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; from mutlipleid&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp; group by email&lt;/P&gt;&lt;P&gt;&amp;nbsp; ;&lt;/P&gt;&lt;P&gt;quit;&lt;/P&gt;&lt;/PRE&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:20:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23374#M5158</guid>
      <dc:creator>Tom</dc:creator>
      <dc:date>2012-01-03T19:20:23Z</dc:date>
    </item>
    <item>
      <title>Re: finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23375#M5159</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;or, if you prefer a datastep solution:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data have;&lt;/P&gt;&lt;P&gt;&amp;nbsp; informat email $40.;&lt;/P&gt;&lt;P&gt;&amp;nbsp; input email tchid $;&lt;/P&gt;&lt;P&gt;&amp;nbsp; cards;&lt;/P&gt;&lt;P&gt;&lt;A class="jive-link-email-small" href="mailto:jdoe@aol.com"&gt;jdoe@aol.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 01&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;A class="jive-link-email-small" href="mailto:sjones@hotmail.com"&gt;sjones@hotmail.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 02&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;A class="jive-link-email-small" href="mailto:hsmith@yahoo.com"&gt;hsmith@yahoo.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 03&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;A class="jive-link-email-small" href="mailto:hsmith@yahoo.com"&gt;hsmith@yahoo.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 04&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc sort data=have (drop=tchid) out=want;&lt;/P&gt;&lt;P&gt;&amp;nbsp; by email;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data want (drop=temp:);&lt;/P&gt;&lt;P&gt;&amp;nbsp; set want;&lt;/P&gt;&lt;P&gt;&amp;nbsp; by email;&lt;/P&gt;&lt;P&gt;&amp;nbsp; if first.email then tempid+1;&lt;/P&gt;&lt;P&gt;&amp;nbsp; tchid=put(tempid,z2.);&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;That could, of course, be modified to retain some of the existing ids.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:23:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23375#M5159</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2012-01-03T19:23:02Z</dc:date>
    </item>
    <item>
      <title>finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23376#M5160</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt; this would assign new IDs, wouldn't it?&amp;nbsp; I have to use the ones that are already in the student information system.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:26:44 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23376#M5160</guid>
      <dc:creator>GreggB</dc:creator>
      <dc:date>2012-01-03T19:26:44Z</dc:date>
    </item>
    <item>
      <title>finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23377#M5161</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt; schools are using different ranges.&amp;nbsp; they are assigned at a central location to ensure there's no overlap.&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:27:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23377#M5161</guid>
      <dc:creator>GreggB</dc:creator>
      <dc:date>2012-01-03T19:27:52Z</dc:date>
    </item>
    <item>
      <title>finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23378#M5162</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Easy to accomodate:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;data have;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; informat email $40.;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; input email tchid $;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; cards;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;A class="jive-link-email-small" href="mailto:jdoe@aol.com" style="outline-style: none; padding-top: 1px; padding-bottom: 1px; padding-left: 17px; zoom: 1; color: #0e66ba; background-position: no-repeat no-repeat;"&gt;jdoe@aol.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 01&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;A class="jive-link-email-small" href="mailto:sjones@hotmail.com" style="outline-style: none; padding-top: 1px; padding-bottom: 1px; padding-left: 17px; zoom: 1; color: #0e66ba; background-position: no-repeat no-repeat;"&gt;sjones@hotmail.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 02&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;A class="jive-link-email-small" href="mailto:hsmith@yahoo.com" style="outline-style: none; padding-top: 1px; padding-bottom: 1px; padding-left: 17px; zoom: 1; color: #0e66ba; background-position: no-repeat no-repeat;"&gt;hsmith@yahoo.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 03&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;A class="jive-link-email-small" href="mailto:hsmith@yahoo.com" style="outline-style: none; padding-top: 1px; padding-bottom: 1px; padding-left: 17px; zoom: 1; color: #0e66ba; background-position: no-repeat no-repeat;"&gt;hsmith@yahoo.com&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; 04&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;proc sort data=have out=want;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; by email;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;data want (drop=hold:);&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; set want;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; by email;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; retain holdid;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; if first.email then holdid=tchid;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;&amp;nbsp; else tchid=holdid;&lt;/P&gt;&lt;P style="background-color: #ffffff; font-size: 12px; font-family: Arial, Helvetica, sans-serif;"&gt;run;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:32:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23378#M5162</guid>
      <dc:creator>art297</dc:creator>
      <dc:date>2012-01-03T19:32:19Z</dc:date>
    </item>
    <item>
      <title>Re: finding duplicates</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23379#M5163</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;you can get what you want by:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;proc sort data=have out=want nodupkey;&lt;/P&gt;&lt;P&gt;by email;&lt;/P&gt;&lt;P&gt;run;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Linlin&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 03 Jan 2012 19:32:41 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/finding-duplicates/m-p/23379#M5163</guid>
      <dc:creator>Linlin</dc:creator>
      <dc:date>2012-01-03T19:32:41Z</dc:date>
    </item>
  </channel>
</rss>

