Hi SAS Experts -
I have a macro that calculates acceptable range of a variable. An acceptable range is defined by :
Lower Limit = Q1 - 1.5*(Q3-Q1)
Upper Limit = Q3 + 1.5*(Q3-Q1)
It's a boxplot method of calculating outliers. The macro is working fine. But it is inefficient in terms of its processing as it calculates outliers for each variable in a loop and then capping values. I want proc univariate to be run for all the variables (not in loop) and save output in a dataset and then capping for variables using IF THEN at one time only.
Code : -
options mprint symbolgen;
%macro outliers(input=, vars=, output= );
data &output;
set &input;
run;
%let n=%sysfunc(countw(&vars));
%do i= 1 %to &n;
%let val = %scan(&vars,&i);
/* Calculate the quartiles and inter-quartile range using proc univariate */
proc univariate data=&output noprint;
var &val;
output out=temp QRANGE= IQR Q1= First_Qtl Q3= Third_Qtl;
run;
/* Extract the upper and lower limits into macro variables */
data _null_;
set temp;
call symput('QR', IQR);
call symput('Q1', First_Qtl);
call symput('Q3', Third_Qtl);
run;
%let ULimit=%sysevalf(&Q3 + 1.5 * &QR);
%let LLimit=%sysevalf(&Q1 - 1.5 * &QR);
/* Final dataset excluding outliers*/
data &output;
set &output;
if &val < &Llimit then &val = &Llimit;
if &val > &Ulimit then &val = &Ulimit;
run;
%end;
%mend;
%outliers(Input=abcd, Vars = a, output= test);
o
Thanks in anticipation!
Hi.
I guest you are looking for something like this:
%macro outliers(input=, vars=, output= );
%let nvars=%sysfunc(countw(&vars));
%do i= 1 %to &nvars;
%let val = %scan(&vars,&i);
/* Calculate the quartiles and inter-quartile range using proc univariate */
proc univariate data=&input noprint;
var &vars;
* get qr, q1, q3 for each vars;
output out=&output QRANGE= %do j=1 %to &nvars; IQR_&j %end;
Q1= %do j=1 %to &nvars; First_Qtl_&j %end;
Q3= %do j=1 %to &nvars; Third_Qtl_&j %end;
;
run;
/* Extract the upper and lower limits into macro variables */
data _null_;
set &output;
* get lower/upper limits for each vars;
%do j=1 %to &nvars;
call symput("Llimit_&j",put(First_Qtl_&j+1.5*IQR_&j,best.));
call symput("Ulimit_&j",put(Third_Qtl_&j+1.5*IQR_&j,best.));
%end;
run;
/* Final dataset excluding outliers*/
data &output;
set &input;
* adjust limits for each vars;
%do j=1 %to &nvars;
%let var=%scan(&vars,&j,%str( ));
%put Var=&var Llimit=&&Llimit_&j Ulimit=&&Ulimit_&j;
if &var < &&Llimit_&j then &var = &&Llimit_&j;
if &var > &&Ulimit_&j then &var = &&Ulimit_&j;
%end;
run;
%end;
* get rid of temporaries;
proc datasets lib=work nolist;
delete _:;
quit;
%mend;
%outliers(Input=abcd, Vars = a b, output= test);
Hope it helps.
Daniel Santos @ www.cgd.pt
Don't miss out on SAS Innovate - Register now for the FREE Livestream!
Can't make it to Vegas? No problem! Watch our general sessions LIVE or on-demand starting April 17th. Hear from SAS execs, best-selling author Adam Grant, Hot Ones host Sean Evans, top tech journalist Kara Swisher, AI expert Cassie Kozyrkov, and the mind-blowing dance crew iLuminate! Plus, get access to over 20 breakout sessions.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.