<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Question about checking and cleaning data in New SAS User</title>
    <link>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836556#M36079</link>
    <description>&lt;P&gt;Cleaning by creating those data sets, in my opinion, isn't particularly helpful.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I would start with something like&lt;/P&gt;
&lt;PRE&gt;Proc freq data=have;
   tables id*state*score / list;
run;&lt;/PRE&gt;
&lt;P&gt;Which will give counts of the same combinations and show the differences near each other.&lt;/P&gt;
&lt;P&gt;Actually and output data set could be made with the counts and filtered to only those where the count indicates a problem.&lt;/P&gt;
&lt;P&gt;Or consider REPORTS instead of data sets&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;Proc tabulate data=have;
   class id state ;
   table id,
           state
           /misstext=' '
  ;
run;&lt;/PRE&gt;</description>
    <pubDate>Mon, 03 Oct 2022 16:20:42 GMT</pubDate>
    <dc:creator>ballardw</dc:creator>
    <dc:date>2022-10-03T16:20:42Z</dc:date>
    <item>
      <title>Question about checking and cleaning data</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836555#M36078</link>
      <description>&lt;P&gt;Closed&lt;/P&gt;</description>
      <pubDate>Mon, 03 Oct 2022 18:40:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836555#M36078</guid>
      <dc:creator>hellorc</dc:creator>
      <dc:date>2022-10-03T18:40:25Z</dc:date>
    </item>
    <item>
      <title>Re: Question about checking and cleaning data</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836556#M36079</link>
      <description>&lt;P&gt;Cleaning by creating those data sets, in my opinion, isn't particularly helpful.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I would start with something like&lt;/P&gt;
&lt;PRE&gt;Proc freq data=have;
   tables id*state*score / list;
run;&lt;/PRE&gt;
&lt;P&gt;Which will give counts of the same combinations and show the differences near each other.&lt;/P&gt;
&lt;P&gt;Actually and output data set could be made with the counts and filtered to only those where the count indicates a problem.&lt;/P&gt;
&lt;P&gt;Or consider REPORTS instead of data sets&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;Proc tabulate data=have;
   class id state ;
   table id,
           state
           /misstext=' '
  ;
run;&lt;/PRE&gt;</description>
      <pubDate>Mon, 03 Oct 2022 16:20:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836556#M36079</guid>
      <dc:creator>ballardw</dc:creator>
      <dc:date>2022-10-03T16:20:42Z</dc:date>
    </item>
    <item>
      <title>Re: Question about checking and cleaning data</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836567#M36081</link>
      <description>&lt;P&gt;Try this code:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;data have;&lt;BR /&gt;input ID state $ city $ score @@;&lt;BR /&gt;datalines;&lt;BR /&gt;1 A A 100&lt;BR /&gt;1 A B 100&lt;BR /&gt;1 A C 101&lt;BR /&gt;1 B D 102&lt;BR /&gt;2 B E 99&lt;BR /&gt;2 B F 99&lt;BR /&gt;2 B G 99&lt;BR /&gt;3 A C 88&lt;BR /&gt;4 C H 120&lt;BR /&gt;4 D J 110&lt;BR /&gt;4 E H 111&lt;BR /&gt;4 E I 121&lt;BR /&gt;;&lt;BR /&gt;run;&lt;BR /&gt;&lt;BR /&gt;data tmp_want_state;&lt;BR /&gt;set have;&lt;BR /&gt;by id state city;&lt;BR /&gt;retain state_num;&lt;BR /&gt;if first.id then state_num = 1;&lt;BR /&gt;else state_num = state_num + 1;&lt;BR /&gt;state_id  = compress('State'|| state_num);&lt;BR /&gt;run;&lt;BR /&gt;&lt;BR /&gt;proc transpose data=tmp_want_state out=tmp_want;&lt;BR /&gt;    by ID;&lt;BR /&gt;    id state_id;&lt;BR /&gt;    var state;&lt;BR /&gt;run;&lt;BR /&gt;&lt;BR /&gt;proc sql;&lt;BR /&gt;create table want1 as&lt;BR /&gt;select &lt;BR /&gt;	a.*, b.same_state &lt;BR /&gt;from tmp_want a &lt;BR /&gt;left join &lt;BR /&gt;(select &lt;BR /&gt;	ID, count(distinct state) as num_distinct_states,&lt;BR /&gt;	case when count(distinct state) = 1 then 'yes'&lt;BR /&gt;	else 'no'&lt;BR /&gt;	end as same_state&lt;BR /&gt;from have&lt;BR /&gt;group by ID)b&lt;BR /&gt;on a.ID = b.ID;&lt;BR /&gt;&lt;BR /&gt;create table want2 as&lt;BR /&gt;select &lt;BR /&gt;	a.*, b.same_score &lt;BR /&gt;from tmp_want a &lt;BR /&gt;left join &lt;BR /&gt;(select &lt;BR /&gt;	ID, count(distinct score) as num_distinct_scores,&lt;BR /&gt;	case when count(distinct score) = 1 then 'yes'&lt;BR /&gt;	else 'no'&lt;BR /&gt;	end as same_score&lt;BR /&gt;from have&lt;BR /&gt;group by ID)b&lt;BR /&gt;on a.ID = b.ID;&lt;BR /&gt;&lt;BR /&gt;quit;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 03 Oct 2022 17:17:42 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836567#M36081</guid>
      <dc:creator>SubbuPaz</dc:creator>
      <dc:date>2022-10-03T17:17:42Z</dc:date>
    </item>
    <item>
      <title>Re: Question about checking and cleaning data</title>
      <link>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836570#M36082</link>
      <description>&lt;P&gt;One way to approach this would be to count the number of unique values for STATE for each subject.&amp;nbsp;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;With a data step, you could do this using BY-group processing, like:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;data have;
input ID state $ city $ score @@;
datalines;
1 A A 100
1 A B 100
1 A C 101
1 B D 102
2 B E 99
2 B F 99
2 B G 99
3 A C 88
4 C H 120
4 D J 110
4 E H 111
4 E I 121
;
run;

data want (keep=id statecount);
  set have (keep=id state);
  by id state ;

  if first.id then statecount=0 ;      *If this is a new ID, set a counter variable to 0 ;
  if first.state then statecount++1 ;  *If this is a new state, increment the counter ;
  if last.id ;                         *If this is the last record for an ID, use a subsetting IF to select it ;

  put (id statecount)(=) ;
run ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 03 Oct 2022 17:37:24 GMT</pubDate>
      <guid>https://communities.sas.com/t5/New-SAS-User/Question-about-checking-and-cleaning-data/m-p/836570#M36082</guid>
      <dc:creator>Quentin</dc:creator>
      <dc:date>2022-10-03T17:37:24Z</dc:date>
    </item>
  </channel>
</rss>

