<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic replacing extreme values with missing in SAS Procedures</title>
    <link>https://communities.sas.com/t5/SAS-Procedures/replacing-extreme-values-with-missing/m-p/568191#M75156</link>
    <description>&lt;P&gt;I have a data set with both continuous and categorical variables. I need to find extreme values and replace them as missing values for the continuous variables. I've gotten this far:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Calculate Median and IQR */
PROC UNIVARIATE DATA = kddcup98 NOPRINT;
VAR DemAge
DemMedHomeValue
DemMedIncome
DemPctVeterans
GiftAvg36
GiftAvgAll
GiftAvgCard36
GiftAvgLast
GiftCnt36
GiftCntAll
GiftCntCard36
GiftCntCardAll
GiftTimeFirst
GiftTimeLast
PromCnt12
PromCnt36
PromCntAll
PromCntCard12
PromCntCard36
PromCntCardAll
TARGET_D;
OUTPUT OUT = boxStats p25 = p25 p75 = p75 QRANGE = iqr;
RUN;

DATA _null_;
SET boxStats;
CALL symput ('p25',p25);
CALL symput ('p75',p75);
CALL symput ('iqr', iqr);
RUN;

%PUT &amp;amp;p25;
%PUT &amp;amp;p75;
%PUT &amp;amp;iqr;

DATA trimmed;
SET kddcup98;
ARRAY change _numeric_;
DO OVER change;
IF (change &amp;gt; &amp;amp;p75 + 1.5 * &amp;amp;iqr) OR (change &amp;lt; &amp;amp;p25 - 1.5 * &amp;amp;iqr) THEN change = .;
END;
RUN;

/* List Variables with Missing Values */
PROC MEANS DATA=trimmed NMISS N;
TITLE 'trimmed Variables with Number of Missing Values (NMISS) and Number of Numeric Values (N)';
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The only problem is that is miscalculates the number of extreme values. In some cases, it considers most of the values as extreme.&lt;/P&gt;</description>
    <pubDate>Sun, 23 Jun 2019 09:10:55 GMT</pubDate>
    <dc:creator>axescot78</dc:creator>
    <dc:date>2019-06-23T09:10:55Z</dc:date>
    <item>
      <title>replacing extreme values with missing</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/replacing-extreme-values-with-missing/m-p/568191#M75156</link>
      <description>&lt;P&gt;I have a data set with both continuous and categorical variables. I need to find extreme values and replace them as missing values for the continuous variables. I've gotten this far:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;CODE class=" language-sas"&gt;/* Calculate Median and IQR */
PROC UNIVARIATE DATA = kddcup98 NOPRINT;
VAR DemAge
DemMedHomeValue
DemMedIncome
DemPctVeterans
GiftAvg36
GiftAvgAll
GiftAvgCard36
GiftAvgLast
GiftCnt36
GiftCntAll
GiftCntCard36
GiftCntCardAll
GiftTimeFirst
GiftTimeLast
PromCnt12
PromCnt36
PromCntAll
PromCntCard12
PromCntCard36
PromCntCardAll
TARGET_D;
OUTPUT OUT = boxStats p25 = p25 p75 = p75 QRANGE = iqr;
RUN;

DATA _null_;
SET boxStats;
CALL symput ('p25',p25);
CALL symput ('p75',p75);
CALL symput ('iqr', iqr);
RUN;

%PUT &amp;amp;p25;
%PUT &amp;amp;p75;
%PUT &amp;amp;iqr;

DATA trimmed;
SET kddcup98;
ARRAY change _numeric_;
DO OVER change;
IF (change &amp;gt; &amp;amp;p75 + 1.5 * &amp;amp;iqr) OR (change &amp;lt; &amp;amp;p25 - 1.5 * &amp;amp;iqr) THEN change = .;
END;
RUN;

/* List Variables with Missing Values */
PROC MEANS DATA=trimmed NMISS N;
TITLE 'trimmed Variables with Number of Missing Values (NMISS) and Number of Numeric Values (N)';
RUN;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The only problem is that is miscalculates the number of extreme values. In some cases, it considers most of the values as extreme.&lt;/P&gt;</description>
      <pubDate>Sun, 23 Jun 2019 09:10:55 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/replacing-extreme-values-with-missing/m-p/568191#M75156</guid>
      <dc:creator>axescot78</dc:creator>
      <dc:date>2019-06-23T09:10:55Z</dc:date>
    </item>
    <item>
      <title>Re: replacing extreme values with missing</title>
      <link>https://communities.sas.com/t5/SAS-Procedures/replacing-extreme-values-with-missing/m-p/568346#M75160</link>
      <description>&lt;P&gt;Are you trying to trim or Winsorize each variable? If so, please read &lt;A href="https://blogs.sas.com/content/iml/2017/02/08/winsorization-good-bad-and-ugly.html" target="_self"&gt;"Winsorization: The good, the bad, and the ugly,"&lt;/A&gt; which discusses the statistical implications of getting rid of extreme values. If you decide to proceed and Winsorize your data, the article also contains links to a second article about how to Winsorize, and you can easily modify it to replace extreme values with missing values.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;If you only want the trimmed or Winsorized means and StdDev,&amp;nbsp;y&lt;SPAN&gt;ou can use the ROBUSTSCALE option, the TRIMMED= option, and the WINSORIZED= option to obtain robust estimates without modifying the original data.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 24 Jun 2019 12:53:25 GMT</pubDate>
      <guid>https://communities.sas.com/t5/SAS-Procedures/replacing-extreme-values-with-missing/m-p/568346#M75160</guid>
      <dc:creator>Rick_SAS</dc:creator>
      <dc:date>2019-06-24T12:53:25Z</dc:date>
    </item>
  </channel>
</rss>

