hi ,
how to do outliers capping at 95th percentile in this case?
Variable | N | N Miss | Mean | Std Dev | Minimum | 1st Pctl | 5th Ptcl | 10th Pctl | 25th Pctl | 50th Pctl | 75th Pctl | 90th Pctl | 95th Pctl | 99th Pctl | Maximum |
BALANCE | 8950 | 0 | 1564.47 | 2081.53 | 0 | 0.064811 | 8.80954 | 23.544808 | 128.25396 | 873.385231 | 2054.37 | 4338.58 | 5911.51 | 9342.16 | 19043.14 |
BALANCE_FREQUENCY | 8950 | 0 | 0.8772707 | 0.236904 | 0 | 0.090909 | 0.272727 | 0.454545 | 0.888889 | 1 | 1 | 1 | 1 | 1 | 1 |
PURCHASES | 8950 | 0 | 1003.2 | 2136.63 | 0 | 0 | 0 | 0 | 39.58 | 361.28 | 1110.17 | 2542.68 | 3999.92 | 9007.67 | 49039.57 |
ONEOFF_PURCHASES | 8950 | 0 | 592.4373709 | 1659.89 | 0 | 0 | 0 | 0 | 0 | 38 | 577.83 | 1600.5 | 2675 | 6701.08 | 40761.25 |
INSTALLMENTS_PURCHASES | 8950 | 0 | 411.0676447 | 904.3381152 | 0 | 0 | 0 | 0 | 0 | 89 | 468.65 | 1140.35 | 1753.08 | 3887 | 22500 |
CASH_ADVANCE | 8950 | 0 | 978.8711125 | 2097.16 | 0 | 0 | 0 | 0 | 0 | 0 | 1113.87 | 3069.79 | 4653.69 | 9596.32 | 47137.21 |
PURCHASES_FREQUENCY | 8950 | 0 | 0.4903505 | 0.4013707 | 0 | 0 | 0 | 0 | 0.083333 | 0.5 | 0.916667 | 1 | 1 | 1 | 1 |
ONEOFF_PURCHASES_FREQUENCY | 8950 | 0 | 0.2024577 | 0.2983361 | 0 | 0 | 0 | 0 | 0 | 0.083333 | 0.3 | 0.75 | 1 | 1 | 1 |
PURCHASES_INSTALLMENTS_FREQUENCY | 8950 | 0 | 0.3644373 | 0.3974478 | 0 | 0 | 0 | 0 | 0 | 0.166667 | 0.75 | 1 | 1 | 1 | 1 |
CASH_ADVANCE_FREQUENCY | 8950 | 0 | 0.1351442 | 0.2001214 | 0 | 0 | 0 | 0 | 0 | 0 | 0.222222 | 0.416667 | 0.583333 | 0.833333 | 1.5 |
CASH_ADVANCE_TRX | 8950 | 0 | 3.2488268 | 6.8246467 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 10 | 15 | 29 | 123 |
PURCHASES_TRX | 8950 | 0 | 14.7098324 | 24.8576491 | 0 | 0 | 0 | 0 | 1 | 7 | 17 | 37 | 57 | 117 | 358 |
CREDIT_LIMIT | 8949 | 1 | 4494.45 | 3638.82 | 50 | 500 | 1000 | 1200 | 1600 | 3000 | 6500 | 9500 | 12000 | 17000 | 30000 |
PAYMENTS | 8950 | 0 | 1733.14 | 2895.06 | 0 | 0 | 89.921689 | 179.508581 | 383.273938 | 856.901546 | 1901.28 | 3925.29 | 6083.43 | 13652.75 | 50721.48 |
MINIMUM_PAYMENTS | 8637 | 313 | 864.2065423 | 2372.45 | 0.019163 | 19.492021 | 73.203221 | 106.028116 | 169.123707 | 312.343947 | 825.485459 | 1781.1 | 2767.05 | 9170.43 | 76406.21 |
PRC_FULL_PAYMENT | 8950 | 0 | 0.1537146 | 0.2924992 | 0 | 0 | 0 | 0 | 0 | 0 | 0.142857 | 0.6833335 | 1 | 1 | 1 |
TENURE | 8950 | 0 | 11.5173184 | 1.3383308 | 6 | 6 | 8 | 10 | 12 | 12 | 12 | 12 | 12 | 12 | 12 |
Ratio_Balance_Limit | 8949 | 1 | 0.3889264 | 0.3897223 | 0 | 0.000020637 | 0.0029404 | 0.0077911 | 0.0415267 | 0.3028696 | 0.7175819 | 0.9199151 | 0.9667606 | 1.0571536 | 15.9099511 |
payment_by_min | 8637 | 313 | 9.3500701 | 120.2869146 | 0.000730423 | 0.0553129 | 0.454491 | 0.609355 | 0.9575493 | 2.1704953 | 6.2604302 | 13.3353681 | 21.4431139 | 50.6009237 | 6840.53 |
Average_Purchase | 8950 | 0 | 73.8706831 | 160.5249391 | 0 | 0 | 0 | 0 | 11.98125 | 41.3939286 | 78.6744828 | 147.639 | 228.5714286 | 763.71 | 5981.67 |
Average_Cash_advance | 8950 | 0 | 208.9051168 | 535.6555169 | 0 | 0 | 0 | 0 | 0 | 0 | 247.04197 | 561.180268 | 926.7585355 | 1909.68 | 14836.45 |
I do not know whether there is problem on my system or have you pasted everything inappropriately.
Data is flowing outside the intended place holder.
hi,
how to do outlier capping at 95th percentile?
this is a code
data cluster;
set Y.Cd;
if BALANCE > 5911.51 then BALANCE = 5911.51 ;
if PURCHASES > 3999.92 then PURCHASES = 3999.92 ;
if ONEOFF_PURCHASES > 2675 then ONEOFF_PURCHASES = 2675 ;
if INSTALLMENTS_PURCHASES > 1753.08 then INSTALLMENTS_PURCHASES = 1753.08 ;
if CASH_ADVANCE > 4653.69 then CASH_ADVANCE = 4653.69 ;
if CASH_ADVANCE_TRX > 15 then CASH_ADVANCE_TRX = 15 ;
if PURCHASES_TRX > 57 then PURCHASES_TRX = 57 ;
if CREDIT_LIMIT > 12000 then CREDIT_LIMIT = 12000 ;
if PAYMENTS > 6083.43 then PAYMENTS = 6083.43 ;
if CASH_ADVANCE_FREQUENCY > 0.58333 then CASH_ADVANCE_FREQUENCY = 0.58333 ;
if MINIMUM_PAYMENTS > 2767.05 then MINIMUM_PAYMENTS =2767.05;
if Payment_by_min > 21.44311 then Payment_by_min =21.44311;
if Average_Purchase > 228.571 then Average_Purchase = 228.571;
if Average_Cash_Advance > 1365.19 then Average_Cash_Advance= 926.7585;
run;
Variable | N | N Miss | Mean | Std Dev | Minimum | 1st Pctl | 5th Ptcl | 10th Pctl | 25th Pctl | 50th Pctl | 75th Pctl | 90th Pctl | 95th Pctl | 99th Pctl | Maximum |
BALANCE | 8950 | 0 | 1564.47 | 2081.53 | 0 | 0.06481 | 8.80954 | 23.544808 | 128.25396 | 873.385231 | 2054.37 | 4338.58 | 5911.51 | 9342.16 | 19043.14 |
BALANCE_FREQUENCY | 8950 | 0 | 0.8772707 | 0.236904 | 0 | 0.09091 | 0.272727 | 0.454545 | 0.888889 | 1 | 1 | 1 | 1 | 1 | 1 |
PURCHASES | 8950 | 0 | 1003.2 | 2136.63 | 0 | 0 | 0 | 0 | 39.58 | 361.28 | 1110.17 | 2542.68 | 3999.92 | 9007.67 | 49039.57 |
ONEOFF_PURCHASES | 8950 | 0 | 592.4373709 | 1659.89 | 0 | 0 | 0 | 0 | 0 | 38 | 577.83 | 1600.5 | 2675 | 6701.08 | 40761.25 |
INSTALLMENTS_PURCHASES | 8950 | 0 | 411.0676447 | 904.3381 | 0 | 0 | 0 | 0 | 0 | 89 | 468.65 | 1140.35 | 1753.08 | 3887 | 22500 |
CASH_ADVANCE | 8950 | 0 | 978.8711125 | 2097.16 | 0 | 0 | 0 | 0 | 0 | 0 | 1113.87 | 3069.79 | 4653.69 | 9596.32 | 47137.21 |
PURCHASES_FREQUENCY | 8950 | 0 | 0.4903505 | 0.401371 | 0 | 0 | 0 | 0 | 0.083333 | 0.5 | 0.916667 | 1 | 1 | 1 | 1 |
ONEOFF_PURCHASES_FREQUENCY | 8950 | 0 | 0.2024577 | 0.298336 | 0 | 0 | 0 | 0 | 0 | 0.083333 | 0.3 | 0.75 | 1 | 1 | 1 |
PURCHASES_INSTALLMENTS_FREQUENCY | 8950 | 0 | 0.3644373 | 0.397448 | 0 | 0 | 0 | 0 | 0 | 0.166667 | 0.75 | 1 | 1 | 1 | 1 |
CASH_ADVANCE_FREQUENCY | 8950 | 0 | 0.1351442 | 0.200121 | 0 | 0 | 0 | 0 | 0 | 0 | 0.222222 | 0.416667 | 0.583333 | 0.833333 | 1.5 |
CASH_ADVANCE_TRX | 8950 | 0 | 3.2488268 | 6.824647 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 10 | 15 | 29 | 123 |
PURCHASES_TRX | 8950 | 0 | 14.7098324 | 24.85765 | 0 | 0 | 0 | 0 | 1 | 7 | 17 | 37 | 57 | 117 | 358 |
CREDIT_LIMIT | 8949 | 1 | 4494.45 | 3638.82 | 50 | 500 | 1000 | 1200 | 1600 | 3000 | 6500 | 9500 | 12000 | 17000 | 30000 |
PAYMENTS | 8950 | 0 | 1733.14 | 2895.06 | 0 | 0 | 89.921689 | 179.508581 | 383.273938 | 856.901546 | 1901.28 | 3925.29 | 6083.43 | 13652.75 | 50721.48 |
MINIMUM_PAYMENTS | 8637 | 313 | 864.2065423 | 2372.45 | 0.02 | 19.492 | 73.203221 | 106.028116 | 169.123707 | 312.343947 | 825.485459 | 1781.1 | 2767.05 | 9170.43 | 76406.21 |
PRC_FULL_PAYMENT | 8950 | 0 | 0.1537146 | 0.292499 | 0 | 0 | 0 | 0 | 0 | 0 | 0.142857 | 0.6833335 | 1 | 1 | 1 |
TENURE | 8950 | 0 | 11.5173184 | 1.338331 | 6 | 6 | 8 | 10 | 12 | 12 | 12 | 12 | 12 | 12 | 12 |
Ratio_Balance_Limit | 8949 | 1 | 0.3889264 | 0.389722 | 0 | 2.1E-05 | 0.0029404 | 0.0077911 | 0.0415267 | 0.3028696 | 0.7175819 | 0.9199151 | 0.9667606 | 1.0571536 | 15.9099511 |
payment_by_min | 8637 | 313 | 9.3500701 | 120.2869 | 0 | 0.05531 | 0.454491 | 0.609355 | 0.9575493 | 2.1704953 | 6.2604302 | 13.3353681 | 21.4431139 | 50.6009237 | 6840.53 |
Average_Purchase | 8950 | 0 | 73.8706831 | 160.5249 | 0 | 0 | 0 | 0 | 11.98125 | 41.3939286 | 78.6744828 | 147.639 | 228.5714286 | 763.71 | 5981.67 |
Average_Cash_advance | 8950 | 0 | 208.9051168 | 535.6555 | 0 | 0 | 0 | 0 | 0 | 0 | 247.04197 | 561.180268 | 926.7585355 | 1909.68 | 14836.45 |
So you only want replace any value above 95th percentile with 95th percentile.
%let low=0.05 ;
%let high=0.95 ;
proc iml;
use have;
read all var _num_ into x[c=vname];
close have;
call qntl(q,x,{&low ,&high});
do i=1 to ncol(x);
*x[loc(x[,i]<q[1,i]),i]=q[1,i];
x[loc(x[,i]>q[2,i]),i]=q[2,i];
end;
create want from x[c=vname];
append from x;
close want;
quit;
What kind of outpu do you want ? you want use 95th percentile to replace any value greater than 95th percentile?
Or you want winsorize data ? Here is IML code:
data have;
do i=1 to 100;
a=ceil(ranuni(1)*100);
b=ceil(ranuni(2)*100);
output;
end;
drop i;
run;
%let low=0.05 ;
%let high=0.95 ;
proc iml;
use have;
read all var _num_ into x[c=vname];
close have;
call qntl(q,x,{&low ,&high});
do i=1 to ncol(x);
x[loc(x[,i]<q[1,i]),i]=q[1,i];
x[loc(x[,i]>q[2,i]),i]=q[2,i];
end;
create want from x[c=vname];
append from x;
close want;
quit;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.