<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How do i remove outliers from a dataset? in SAS Programming</title>
    <link>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514400#M138723</link>
    <description>&lt;P&gt;Note that there is an alternative approach ... capping the outliers instead of removing them.&amp;nbsp; For example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE class=" language-sas"&gt;&lt;CODE class="  language-sas"&gt;   &lt;SPAN class="token keyword"&gt;if&lt;/SPAN&gt; &lt;SPAN class="token punctuation"&gt;(&lt;/SPAN&gt;sum_power &amp;gt; &lt;SPAN class="token operator"&gt;&amp;amp;&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;median&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;+&lt;/SPAN&gt; &lt;SPAN class="token number"&gt;1.5&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;*&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;&amp;amp;&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;iqr&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt; then sum_power = &amp;amp;&lt;SPAN class="token function"&gt;median&lt;/SPAN&gt; + &lt;SPAN class="token number"&gt;1.5&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;*&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;&amp;amp;&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;iqr&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt; &lt;/CODE&gt;&lt;/PRE&gt;</description>
    <pubDate>Mon, 19 Nov 2018 11:43:54 GMT</pubDate>
    <dc:creator>Astounding</dc:creator>
    <dc:date>2018-11-19T11:43:54Z</dc:date>
    <item>
      <title>How do i remove outliers from a dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514340#M138696</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a large dataset and I want to remove outliers. Before i proceed further, I've done some research online and below is my code:&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;proc sql noprint;
	create table ppussu_sum as
	select substationname
	, substationcode
	, voltagetype
	, substationtype
	, datetime
	, sum(power_calculated) as sum_power
	from ncpdm.ncp_loadpf_&amp;amp;srcnm._&amp;amp;period
	where (substationtype = 'PPU' and feedertype = 'TXF')
		or (substationtype = 'SSU' and feedertype = 'OF')
	group by substationname, substationcode, voltagetype, substationtype, datetime
	;
quit;



proc univariate data = ppussu_sum;
var sum_power;
output out=boxStats median=median qrange = iqr;
run; 

data _null_;
	set boxStats;
	call symput ('median',median);
	call symput ('iqr', iqr);
run; 

%put &amp;amp;median;
%put &amp;amp;iqr;

data trimmed;
set ppussu_sum;
    if (sum_power le &amp;amp;median + 1.5 * &amp;amp;iqr) and (sum_power ge &amp;amp;median - 1.5 * &amp;amp;iqr); 
run; &lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As you all can see, first i use proc univariate and then remove based on the formula. Just wondering if this is actually the correct way of doing it or is there another more accurate/appropriate way.&lt;/P&gt;</description>
      <pubDate>Mon, 19 Nov 2018 05:31:28 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514340#M138696</guid>
      <dc:creator>WorkingMan</dc:creator>
      <dc:date>2018-11-19T05:31:28Z</dc:date>
    </item>
    <item>
      <title>Re: How do i remove outliers from a dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514344#M138699</link>
      <description>&lt;P&gt;What do you want to do with the data set without outliers? If you want compute some statistic or run some regression, the appropriate SAS procedure usually has options to robustly deal with outliers directly&lt;/P&gt;</description>
      <pubDate>Mon, 19 Nov 2018 06:17:37 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514344#M138699</guid>
      <dc:creator>PeterClemmensen</dc:creator>
      <dc:date>2018-11-19T06:17:37Z</dc:date>
    </item>
    <item>
      <title>Re: How do i remove outliers from a dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514347#M138702</link>
      <description>&lt;P&gt;without outliers, data wont have sudden spike when displaying in visual dashboard. This is the only reason why i want to remove it.&lt;/P&gt;</description>
      <pubDate>Mon, 19 Nov 2018 06:26:47 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514347#M138702</guid>
      <dc:creator>WorkingMan</dc:creator>
      <dc:date>2018-11-19T06:26:47Z</dc:date>
    </item>
    <item>
      <title>Re: How do i remove outliers from a dataset?</title>
      <link>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514400#M138723</link>
      <description>&lt;P&gt;Note that there is an alternative approach ... capping the outliers instead of removing them.&amp;nbsp; For example:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE class=" language-sas"&gt;&lt;CODE class="  language-sas"&gt;   &lt;SPAN class="token keyword"&gt;if&lt;/SPAN&gt; &lt;SPAN class="token punctuation"&gt;(&lt;/SPAN&gt;sum_power &amp;gt; &lt;SPAN class="token operator"&gt;&amp;amp;&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;median&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;+&lt;/SPAN&gt; &lt;SPAN class="token number"&gt;1.5&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;*&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;&amp;amp;&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;iqr&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt; then sum_power = &amp;amp;&lt;SPAN class="token function"&gt;median&lt;/SPAN&gt; + &lt;SPAN class="token number"&gt;1.5&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;*&lt;/SPAN&gt; &lt;SPAN class="token operator"&gt;&amp;amp;&lt;/SPAN&gt;&lt;SPAN class="token function"&gt;iqr&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;)&lt;/SPAN&gt;&lt;SPAN class="token punctuation"&gt;;&lt;/SPAN&gt; &lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 19 Nov 2018 11:43:54 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Programming/How-do-i-remove-outliers-from-a-dataset/m-p/514400#M138723</guid>
      <dc:creator>Astounding</dc:creator>
      <dc:date>2018-11-19T11:43:54Z</dc:date>
    </item>
  </channel>
</rss>

