<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Calculate percent missing in long dataset in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500005#M133086</link>
    <description>&lt;P&gt;Here's a way to do it with SQL, you could probably do it in fewer steps, but I prefer to break things up to keep them simple.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Create example data */
data source ;
	do study_id = 1 to 5 ;
		do time = 1 to 100 ;
			if ranuni(2)&amp;gt;0.05 then
				hr=int(ranuni(0)*10)+90 ;
			else
				hr=. ;
			output ;
		end ;
	end ;
run ;

proc sql ;
	/* Count the missing records by study_id */
	create table missing as
	select study_id, count(*) as missingCnt 
	from source
	where
		hr=.
	group by study_id 
	;
	/* Count all the records by study_id */
	create table all as
	select study_id, count(*) as total
	from source
	group by study_id 
	;
	/* join the 2 tables and calculate percentage by study_id */
	create table percent as 
	select 
		a.study_id,
		m.missingCnt,
		a.total,
		m.missingCnt/a.total as percent
	from
		missing as m, all as a
	where
		m.study_id=a.study_id 
	;
	/* join the percent table to the source data where the percent is &amp;gt; 0.05 (5%) */
	create table results as
	select 
		s.*,
		p.percent
	from
		source as s,
		percent as p
	where
		s.study_id=p.study_id and
		p.percent&amp;lt;0.05 
	;

	/* Check the results */
	proc sql ;
		select 
			unique(study_id)
		from
			results ;


quit ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Fri, 28 Sep 2018 18:49:30 GMT</pubDate>
    <dc:creator>AMSAS</dc:creator>
    <dc:date>2018-09-28T18:49:30Z</dc:date>
    <item>
      <title>Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/499995#M133083</link>
      <description>&lt;P&gt;I have a long dataset with HR data and time in seconds. Some participants have up to 8000 observations of heart rate. At times, the HR did not get collected because there was something wrong with the monitor. I need to identify how much missing data each participant has and if a participant has more than 5% missing heart rate values across time, I will exclude them. I know how to find missing data overall, but I am not sure sure how to calculate the % missing for a long data set by participant. Attached is data for 2 people that gives the structure of my data set. Any suggestion/ help would be greatly appreciated.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 28 Sep 2018 18:21:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/499995#M133083</guid>
      <dc:creator>rfarmenta</dc:creator>
      <dc:date>2018-09-28T18:21:19Z</dc:date>
    </item>
    <item>
      <title>Re: Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500001#M133084</link>
      <description>&lt;P&gt;PROC FREQ with the MISSING option.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;*create sample data to work with;

data class;
	set sashelp.class;

	if age=14 then
		call missing(height, weight, sex);

	if name='Alfred' then
		call missing(sex, age, height);
	label age="Fancy Age Label";
run;

*create format for missing;

proc format ;
	value $ missfmt ' '="Missing" other="Not Missing";
	value nmissfmt .="Missing" other="Not Missing";
run;

*Proc freq to count missing/non missing;
ods table onewayfreqs=temp;

proc freq data=class;
	table _all_ / missing;
	format _numeric_ nmissfmt. _character_ $missfmt.;
run;

*Format output;

data long;
	length variable $32. variable_value $50.;
	set temp;
	Variable=scan(table, 2);
	Variable_Value=strip(trim(vvaluex(variable)));
	presentation=catt(frequency, " (", trim(put(percent/100, percent7.1)), ")");
	keep variable variable_value frequency percent cum: presentation;
	label variable='Variable' variable_value='Variable Value';
run;

proc sort data=long;
	by variable;
run;

proc transpose data=long out=want;
	by variable;
	id variable_value;
	var percent;
run;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 28 Sep 2018 18:39:19 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500001#M133084</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-09-28T18:39:19Z</dc:date>
    </item>
    <item>
      <title>Re: Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500003#M133085</link>
      <description>&lt;P&gt;Actually since you only have one variable proc means is easier. Use it to get NMISS and N (with STACKODS) and then do the calculation in a data step.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 28 Sep 2018 18:45:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500003#M133085</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-09-28T18:45:54Z</dc:date>
    </item>
    <item>
      <title>Re: Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500005#M133086</link>
      <description>&lt;P&gt;Here's a way to do it with SQL, you could probably do it in fewer steps, but I prefer to break things up to keep them simple.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Create example data */
data source ;
	do study_id = 1 to 5 ;
		do time = 1 to 100 ;
			if ranuni(2)&amp;gt;0.05 then
				hr=int(ranuni(0)*10)+90 ;
			else
				hr=. ;
			output ;
		end ;
	end ;
run ;

proc sql ;
	/* Count the missing records by study_id */
	create table missing as
	select study_id, count(*) as missingCnt 
	from source
	where
		hr=.
	group by study_id 
	;
	/* Count all the records by study_id */
	create table all as
	select study_id, count(*) as total
	from source
	group by study_id 
	;
	/* join the 2 tables and calculate percentage by study_id */
	create table percent as 
	select 
		a.study_id,
		m.missingCnt,
		a.total,
		m.missingCnt/a.total as percent
	from
		missing as m, all as a
	where
		m.study_id=a.study_id 
	;
	/* join the percent table to the source data where the percent is &amp;gt; 0.05 (5%) */
	create table results as
	select 
		s.*,
		p.percent
	from
		source as s,
		percent as p
	where
		s.study_id=p.study_id and
		p.percent&amp;lt;0.05 
	;

	/* Check the results */
	proc sql ;
		select 
			unique(study_id)
		from
			results ;


quit ;&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 28 Sep 2018 18:49:30 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500005#M133086</guid>
      <dc:creator>AMSAS</dc:creator>
      <dc:date>2018-09-28T18:49:30Z</dc:date>
    </item>
    <item>
      <title>Re: Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500008#M133087</link>
      <description>&lt;P&gt;While you have got workable solutions, would be nice to include the expected output for the sample you provided&lt;/P&gt;</description>
      <pubDate>Fri, 28 Sep 2018 18:54:40 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500008#M133087</guid>
      <dc:creator>novinosrin</dc:creator>
      <dc:date>2018-09-28T18:54:40Z</dc:date>
    </item>
    <item>
      <title>Re: Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500011#M133089</link>
      <description>&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;*generate counts of N/NMISS;
proc means data=have stackods n nmiss NWAY;
class subjid;
var hr;
ods output summary=summary_stats;
run;

*calculate percent missing;
data percent_missing;
set summary_stats;
length exclude_flag $4.;
Pct_Missing = nmiss/sum(n, nmiss);
if pct_missing&amp;gt;0.05 then exclude_flag='Yes';
else exclude_flag='No';
run; 

*filtered original input removing those will more than 5% missing;
proc sql;
create table filtered as
select *
from have where subjid not in (select subjid from percent_missing where exclude_flag = 'Yes');
quit;
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://communities.sas.com/t5/user/viewprofilepage/user-id/297"&gt;@rfarmenta&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;
&lt;P&gt;I have a long dataset with HR data and time in seconds. Some participants have up to 8000 observations of heart rate. At times, the HR did not get collected because there was something wrong with the monitor. I need to identify how much missing data each participant has and if a participant has more than 5% missing heart rate values across time, I will exclude them. I know how to find missing data overall, but I am not sure sure how to calculate the % missing for a long data set by participant. Attached is data for 2 people that gives the structure of my data set. Any suggestion/ help would be greatly appreciated.&amp;nbsp;&lt;/P&gt;
&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;
&lt;P&gt;&amp;nbsp;EDIT: corrected if percent&amp;gt;5 to if percent&amp;gt;0.05 since percents are between 0 and 1 with this calculation.&lt;/P&gt;</description>
      <pubDate>Fri, 28 Sep 2018 19:17:27 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500011#M133089</guid>
      <dc:creator>Reeza</dc:creator>
      <dc:date>2018-09-28T19:17:27Z</dc:date>
    </item>
    <item>
      <title>Re: Calculate percent missing in long dataset</title>
      <link>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500048#M133109</link>
      <description>&lt;P&gt;This solution worked very well. Thank you!&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 28 Sep 2018 21:32:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/Calculate-percent-missing-in-long-dataset/m-p/500048#M133109</guid>
      <dc:creator>rfarmenta</dc:creator>
      <dc:date>2018-09-28T21:32:55Z</dc:date>
    </item>
  </channel>
</rss>

