<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Finding discrepancies in multiple entries in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341495#M78182</link>
    <description>&lt;P&gt;Something that looks like this, but also a code that will count how many times by unique ID this has happened in the dataset.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;IMG src="https://communities.sas.com/t5/image/serverpage/image-id/7778iF2D2B66B0846B832/image-size/original?v=1.0&amp;amp;px=-1" border="0" alt="Gender.png" title="Gender.png" /&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 16 Mar 2017 09:29:02 GMT</pubDate>
    <dc:creator>byeh2017</dc:creator>
    <dc:date>2017-03-16T09:29:02Z</dc:date>
    <item>
      <title>Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341482#M78173</link>
      <description>&lt;P&gt;I'm doing some data cleaning on a dataset that includes dates, ID, and gender. For certain subsequent dates, the gender sometimes is miscoded. 1 is male and 2 is female. How do I find in the entire dataset all the IDs that are associated with this gender coding discrepancy? Thank you&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data MYDATA.SAMPLEGENDER;
  infile datalines dsd truncover;
  input Date:MMDDYY10. ID:BEST. Gender:BEST.;
datalines4;
11/01/2016,1,1
11/01/2016,2,2
11/01/2016,3,1
11/02/2016,1,2
11/04/2016,5,2
11/03/2016,6,2
11/04/2016,3,2
11/04/2016,8,1
11/01/2016,9,2
11/01/2016,10,2
11/01/2016,11,1
11/01/2016,12,2
11/01/2016,13,1
11/01/2016,14,2
11/10/2016,14,1
11/11/2016,14,2
;;;;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 Mar 2017 08:59:23 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341482#M78173</guid>
      <dc:creator>byeh2017</dc:creator>
      <dc:date>2017-03-16T08:59:23Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341488#M78178</link>
      <description>&lt;P&gt;What exactly do you want the output to look like? &amp;nbsp;You can pull up discrepancies quite simply with a proc freq if you want to know how many of each type, or if you just want a list of subjects and the coding then proc sort nodupkey by id sex. &amp;nbsp;Or is the first record the right one, and anything different to that should be flagged? &amp;nbsp;Need to show what the output should look like.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2017 09:14:17 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341488#M78178</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2017-03-16T09:14:17Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341495#M78182</link>
      <description>&lt;P&gt;Something that looks like this, but also a code that will count how many times by unique ID this has happened in the dataset.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;IMG src="https://communities.sas.com/t5/image/serverpage/image-id/7778iF2D2B66B0846B832/image-size/original?v=1.0&amp;amp;px=-1" border="0" alt="Gender.png" title="Gender.png" /&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2017 09:29:02 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341495#M78182</guid>
      <dc:creator>byeh2017</dc:creator>
      <dc:date>2017-03-16T09:29:02Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341499#M78186</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
  infile datalines dsd truncover;
  input Date:MMDDYY10. ID:BEST. Gender:BEST.;
  format date mmddyy10.;
datalines4;
11/01/2016,1,1
11/01/2016,2,2
11/01/2016,3,1
11/02/2016,1,2
11/04/2016,5,2
11/03/2016,6,2
11/04/2016,3,2
11/04/2016,8,1
11/01/2016,9,2
11/01/2016,10,2
11/01/2016,11,1
11/01/2016,12,2
11/01/2016,13,1
11/01/2016,14,2
11/10/2016,14,1
11/11/2016,14,2
;;;;
run;

proc sort data=have;
by id date;
run;

data lookup (keep=id);;
set have;
by id;
retain checkgen flag;
if first.id
then do;
  checkgen = gender;
  flag = 0;
end;
if gender ne checkgen then flag = 1;
if last.id and flag then output;
run;

data want;
merge have lookup (in=check);
by id;
if check;
run;

proc print data=want noobs;
by id;
run;

proc sql;
select count(*) from lookup;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 Mar 2017 09:38:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341499#M78186</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2017-03-16T09:38:35Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341503#M78188</link>
      <description>&lt;P&gt;Well for counts, proc freq is simple enough. &amp;nbsp;For your output below, sorry, its not clear. &amp;nbsp;That just looks like a proc print of the data you have by ID? &amp;nbsp;What is the logic, do you take the first record as being correct, and then output any that don't match that? &amp;nbsp;Do you just want an output of distinct id/sex, then proc sort would work.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2017 09:48:35 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341503#M78188</guid>
      <dc:creator>RW9</dc:creator>
      <dc:date>2017-03-16T09:48:35Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341513#M78191</link>
      <description>&lt;P&gt;Thank you. I'm trying to apply it to my main dataset. I noticed that the proc print is printing everything associated with the dataset. What is the line to limit it only to dcdeathdate and the gender?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc print data=mydata.ODgenderlook noobs;
by nationalid;
run;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2017 10:15:59 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341513#M78191</guid>
      <dc:creator>byeh2017</dc:creator>
      <dc:date>2017-03-16T10:15:59Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341522#M78195</link>
      <description>&lt;PRE&gt;

data have;
  infile datalines dsd truncover;
  input Date:MMDDYY10. ID:BEST. Gender:BEST.;
  format date mmddyy10.;
datalines4;
11/01/2016,1,1
11/01/2016,2,2
11/01/2016,3,1
11/02/2016,1,2
11/04/2016,5,2
11/03/2016,6,2
11/04/2016,3,2
11/04/2016,8,1
11/01/2016,9,2
11/01/2016,10,2
11/01/2016,11,1
11/01/2016,12,2
11/01/2016,13,1
11/01/2016,14,2
11/10/2016,14,1
11/11/2016,14,2
;;;;
run;
proc sql;
create table want as
 select id,gender,count(*) as count
  from (
select * from have group by id having count(distinct gender) ne 1 
)
group by id,gender;
quit;

&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 Mar 2017 10:42:52 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341522#M78195</guid>
      <dc:creator>Ksharp</dc:creator>
      <dc:date>2017-03-16T10:42:52Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341587#M78208</link>
      <description>&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/127989"&gt;@byeh2017&lt;/a&gt; wrote:&lt;BR /&gt;
&lt;P&gt;Thank you. I'm trying to apply it to my main dataset. I noticed that the proc print is printing everything associated with the dataset. What is the line to limit it only to dcdeathdate and the gender?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc print data=mydata.ODgenderlook noobs;
by nationalid;
run;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;Use the var statement. var is used in many procedures to select which variables are used in the procedure.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc print data=mydata.ODgenderlook noobs;
by nationalid;
var dcdeathdate gender;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 16 Mar 2017 14:14:32 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/341587#M78208</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2017-03-16T14:14:32Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/346627#M79960</link>
      <description>&lt;P&gt;Is there a way I can do this that looks like this output? It is simply just a listing out of the entries that have gender discrepancies.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;IMG src="https://communities.sas.com/t5/image/serverpage/image-id/8118iD38F12DA464B5C7C/image-size/original?v=1.0&amp;amp;px=-1" border="0" alt="gendercheck.png" title="gendercheck.png" /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here's the sample dataset again. Thank you:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE class=" language-sas"&gt;&lt;CODE class="  language-sas"&gt;&lt;SPAN class="token procnames"&gt;data&lt;/SPAN&gt; MYDATA&lt;SPAN class="token punctuation"&gt;.&lt;/SPAN&gt;SAMPLEGENDER&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;
  &lt;SPAN class="token statement"&gt;infile&lt;/SPAN&gt; datalines dsd truncover&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;
  &lt;SPAN class="token keyword"&gt;input&lt;/SPAN&gt; &lt;SPAN class="token function"&gt;Date&lt;/SPAN&gt;:MMDDYY10&lt;SPAN class="token punctuation"&gt;.&lt;/SPAN&gt; &lt;SPAN class="token keyword"&gt;ID&lt;/SPAN&gt;:BEST&lt;SPAN class="token punctuation"&gt;.&lt;/SPAN&gt; Gender:BEST&lt;SPAN class="token punctuation"&gt;.&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;
&lt;SPAN class="token datalines"&gt;&lt;SPAN class="token keyword"&gt;datalines&lt;/SPAN&gt;&lt;SPAN class="token data string"&gt;4&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;SPAN class="token data string"&gt;11/01/2016,1,1
11/01/2016,2,2
11/01/2016,3,1
11/02/2016,1,2
11/04/2016,5,2
11/03/2016,6,2
11/04/2016,3,2
11/04/2016,8,1
11/01/2016,9,2
11/01/2016,10,2
11/01/2016,11,1
11/01/2016,12,2
11/01/2016,13,1
11/01/2016,14,2
11/10/2016,14,1
11/11/2016,14,2&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 03 Apr 2017 11:18:58 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/346627#M79960</guid>
      <dc:creator>byeh2017</dc:creator>
      <dc:date>2017-04-03T11:18:58Z</dc:date>
    </item>
    <item>
      <title>Re: Finding discrepancies in multiple entries</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/347633#M80347</link>
      <description>&lt;P&gt;There is no entry for ID=2 and date=12/1/2016 in your sample dataset.&lt;/P&gt;
&lt;P&gt;This SQL finds multiple gender values per id:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql;
create table result as
select date, id, gender
from samplegender
where id in (
  select id
  from samplegender
  group by id
  having count(distinct gender) &amp;gt; 1
)
order by id
;
quit;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 06 Apr 2017 09:12:48 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Finding-discrepancies-in-multiple-entries/m-p/347633#M80347</guid>
      <dc:creator>Kurt_Bremser</dc:creator>
      <dc:date>2017-04-06T09:12:48Z</dc:date>
    </item>
  </channel>
</rss>

