hi ,
how to do outliers capping at 95th percentile in this case?
Variable | N | N Miss | Mean | Std Dev | Minimum | 1st Pctl | 5th Ptcl | 10th Pctl | 25th Pctl | 50th Pctl | 75th Pctl | 90th Pctl | 95th Pctl | 99th Pctl | Maximum |
BALANCE | 8950 | 0 | 1564.47 | 2081.53 | 0 | 0.064811 | 8.80954 | 23.544808 | 128.25396 | 873.385231 | 2054.37 | 4338.58 | 5911.51 | 9342.16 | 19043.14 |
BALANCE_FREQUENCY | 8950 | 0 | 0.8772707 | 0.236904 | 0 | 0.090909 | 0.272727 | 0.454545 | 0.888889 | 1 | 1 | 1 | 1 | 1 | 1 |
PURCHASES | 8950 | 0 | 1003.2 | 2136.63 | 0 | 0 | 0 | 0 | 39.58 | 361.28 | 1110.17 | 2542.68 | 3999.92 | 9007.67 | 49039.57 |
ONEOFF_PURCHASES | 8950 | 0 | 592.4373709 | 1659.89 | 0 | 0 | 0 | 0 | 0 | 38 | 577.83 | 1600.5 | 2675 | 6701.08 | 40761.25 |
INSTALLMENTS_PURCHASES | 8950 | 0 | 411.0676447 | 904.3381152 | 0 | 0 | 0 | 0 | 0 | 89 | 468.65 | 1140.35 | 1753.08 | 3887 | 22500 |
CASH_ADVANCE | 8950 | 0 | 978.8711125 | 2097.16 | 0 | 0 | 0 | 0 | 0 | 0 | 1113.87 | 3069.79 | 4653.69 | 9596.32 | 47137.21 |
PURCHASES_FREQUENCY | 8950 | 0 | 0.4903505 | 0.4013707 | 0 | 0 | 0 | 0 | 0.083333 | 0.5 | 0.916667 | 1 | 1 | 1 | 1 |
ONEOFF_PURCHASES_FREQUENCY | 8950 | 0 | 0.2024577 | 0.2983361 | 0 | 0 | 0 | 0 | 0 | 0.083333 | 0.3 | 0.75 | 1 | 1 | 1 |
PURCHASES_INSTALLMENTS_FREQUENCY | 8950 | 0 | 0.3644373 | 0.3974478 | 0 | 0 | 0 | 0 | 0 | 0.166667 | 0.75 | 1 | 1 | 1 | 1 |
CASH_ADVANCE_FREQUENCY | 8950 | 0 | 0.1351442 | 0.2001214 | 0 | 0 | 0 | 0 | 0 | 0 | 0.222222 | 0.416667 | 0.583333 | 0.833333 | 1.5 |
CASH_ADVANCE_TRX | 8950 | 0 | 3.2488268 | 6.8246467 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 10 | 15 | 29 | 123 |
PURCHASES_TRX | 8950 | 0 | 14.7098324 | 24.8576491 | 0 | 0 | 0 | 0 | 1 | 7 | 17 | 37 | 57 | 117 | 358 |
CREDIT_LIMIT | 8949 | 1 | 4494.45 | 3638.82 | 50 | 500 | 1000 | 1200 | 1600 | 3000 | 6500 | 9500 | 12000 | 17000 | 30000 |
PAYMENTS | 8950 | 0 | 1733.14 | 2895.06 | 0 | 0 | 89.921689 | 179.508581 | 383.273938 | 856.901546 | 1901.28 | 3925.29 | 6083.43 | 13652.75 | 50721.48 |
MINIMUM_PAYMENTS | 8637 | 313 | 864.2065423 | 2372.45 | 0.019163 | 19.492021 | 73.203221 | 106.028116 | 169.123707 | 312.343947 | 825.485459 | 1781.1 | 2767.05 | 9170.43 | 76406.21 |
PRC_FULL_PAYMENT | 8950 | 0 | 0.1537146 | 0.2924992 | 0 | 0 | 0 | 0 | 0 | 0 | 0.142857 | 0.6833335 | 1 | 1 | 1 |
TENURE | 8950 | 0 | 11.5173184 | 1.3383308 | 6 | 6 | 8 | 10 | 12 | 12 | 12 | 12 | 12 | 12 | 12 |
Ratio_Balance_Limit | 8949 | 1 | 0.3889264 | 0.3897223 | 0 | 0.000020637 | 0.0029404 | 0.0077911 | 0.0415267 | 0.3028696 | 0.7175819 | 0.9199151 | 0.9667606 | 1.0571536 | 15.9099511 |
payment_by_min | 8637 | 313 | 9.3500701 | 120.2869146 | 0.000730423 | 0.0553129 | 0.454491 | 0.609355 | 0.9575493 | 2.1704953 | 6.2604302 | 13.3353681 | 21.4431139 | 50.6009237 | 6840.53 |
Average_Purchase | 8950 | 0 | 73.8706831 | 160.5249391 | 0 | 0 | 0 | 0 | 11.98125 | 41.3939286 | 78.6744828 | 147.639 | 228.5714286 | 763.71 | 5981.67 |
Average_Cash_advance | 8950 | 0 | 208.9051168 | 535.6555169 | 0 | 0 | 0 | 0 | 0 | 0 | 247.04197 | 561.180268 | 926.7585355 | 1909.68 | 14836.45 |
I do not know whether there is problem on my system or have you pasted everything inappropriately.
Data is flowing outside the intended place holder.
hi,
how to do outlier capping at 95th percentile?
this is a code
data cluster;
set Y.Cd;
if BALANCE > 5911.51 then BALANCE = 5911.51 ;
if PURCHASES > 3999.92 then PURCHASES = 3999.92 ;
if ONEOFF_PURCHASES > 2675 then ONEOFF_PURCHASES = 2675 ;
if INSTALLMENTS_PURCHASES > 1753.08 then INSTALLMENTS_PURCHASES = 1753.08 ;
if CASH_ADVANCE > 4653.69 then CASH_ADVANCE = 4653.69 ;
if CASH_ADVANCE_TRX > 15 then CASH_ADVANCE_TRX = 15 ;
if PURCHASES_TRX > 57 then PURCHASES_TRX = 57 ;
if CREDIT_LIMIT > 12000 then CREDIT_LIMIT = 12000 ;
if PAYMENTS > 6083.43 then PAYMENTS = 6083.43 ;
if CASH_ADVANCE_FREQUENCY > 0.58333 then CASH_ADVANCE_FREQUENCY = 0.58333 ;
if MINIMUM_PAYMENTS > 2767.05 then MINIMUM_PAYMENTS =2767.05;
if Payment_by_min > 21.44311 then Payment_by_min =21.44311;
if Average_Purchase > 228.571 then Average_Purchase = 228.571;
if Average_Cash_Advance > 1365.19 then Average_Cash_Advance= 926.7585;
run;
Variable | N | N Miss | Mean | Std Dev | Minimum | 1st Pctl | 5th Ptcl | 10th Pctl | 25th Pctl | 50th Pctl | 75th Pctl | 90th Pctl | 95th Pctl | 99th Pctl | Maximum |
BALANCE | 8950 | 0 | 1564.47 | 2081.53 | 0 | 0.06481 | 8.80954 | 23.544808 | 128.25396 | 873.385231 | 2054.37 | 4338.58 | 5911.51 | 9342.16 | 19043.14 |
BALANCE_FREQUENCY | 8950 | 0 | 0.8772707 | 0.236904 | 0 | 0.09091 | 0.272727 | 0.454545 | 0.888889 | 1 | 1 | 1 | 1 | 1 | 1 |
PURCHASES | 8950 | 0 | 1003.2 | 2136.63 | 0 | 0 | 0 | 0 | 39.58 | 361.28 | 1110.17 | 2542.68 | 3999.92 | 9007.67 | 49039.57 |
ONEOFF_PURCHASES | 8950 | 0 | 592.4373709 | 1659.89 | 0 | 0 | 0 | 0 | 0 | 38 | 577.83 | 1600.5 | 2675 | 6701.08 | 40761.25 |
INSTALLMENTS_PURCHASES | 8950 | 0 | 411.0676447 | 904.3381 | 0 | 0 | 0 | 0 | 0 | 89 | 468.65 | 1140.35 | 1753.08 | 3887 | 22500 |
CASH_ADVANCE | 8950 | 0 | 978.8711125 | 2097.16 | 0 | 0 | 0 | 0 | 0 | 0 | 1113.87 | 3069.79 | 4653.69 | 9596.32 | 47137.21 |
PURCHASES_FREQUENCY | 8950 | 0 | 0.4903505 | 0.401371 | 0 | 0 | 0 | 0 | 0.083333 | 0.5 | 0.916667 | 1 | 1 | 1 | 1 |
ONEOFF_PURCHASES_FREQUENCY | 8950 | 0 | 0.2024577 | 0.298336 | 0 | 0 | 0 | 0 | 0 | 0.083333 | 0.3 | 0.75 | 1 | 1 | 1 |
PURCHASES_INSTALLMENTS_FREQUENCY | 8950 | 0 | 0.3644373 | 0.397448 | 0 | 0 | 0 | 0 | 0 | 0.166667 | 0.75 | 1 | 1 | 1 | 1 |
CASH_ADVANCE_FREQUENCY | 8950 | 0 | 0.1351442 | 0.200121 | 0 | 0 | 0 | 0 | 0 | 0 | 0.222222 | 0.416667 | 0.583333 | 0.833333 | 1.5 |
CASH_ADVANCE_TRX | 8950 | 0 | 3.2488268 | 6.824647 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 10 | 15 | 29 | 123 |
PURCHASES_TRX | 8950 | 0 | 14.7098324 | 24.85765 | 0 | 0 | 0 | 0 | 1 | 7 | 17 | 37 | 57 | 117 | 358 |
CREDIT_LIMIT | 8949 | 1 | 4494.45 | 3638.82 | 50 | 500 | 1000 | 1200 | 1600 | 3000 | 6500 | 9500 | 12000 | 17000 | 30000 |
PAYMENTS | 8950 | 0 | 1733.14 | 2895.06 | 0 | 0 | 89.921689 | 179.508581 | 383.273938 | 856.901546 | 1901.28 | 3925.29 | 6083.43 | 13652.75 | 50721.48 |
MINIMUM_PAYMENTS | 8637 | 313 | 864.2065423 | 2372.45 | 0.02 | 19.492 | 73.203221 | 106.028116 | 169.123707 | 312.343947 | 825.485459 | 1781.1 | 2767.05 | 9170.43 | 76406.21 |
PRC_FULL_PAYMENT | 8950 | 0 | 0.1537146 | 0.292499 | 0 | 0 | 0 | 0 | 0 | 0 | 0.142857 | 0.6833335 | 1 | 1 | 1 |
TENURE | 8950 | 0 | 11.5173184 | 1.338331 | 6 | 6 | 8 | 10 | 12 | 12 | 12 | 12 | 12 | 12 | 12 |
Ratio_Balance_Limit | 8949 | 1 | 0.3889264 | 0.389722 | 0 | 2.1E-05 | 0.0029404 | 0.0077911 | 0.0415267 | 0.3028696 | 0.7175819 | 0.9199151 | 0.9667606 | 1.0571536 | 15.9099511 |
payment_by_min | 8637 | 313 | 9.3500701 | 120.2869 | 0 | 0.05531 | 0.454491 | 0.609355 | 0.9575493 | 2.1704953 | 6.2604302 | 13.3353681 | 21.4431139 | 50.6009237 | 6840.53 |
Average_Purchase | 8950 | 0 | 73.8706831 | 160.5249 | 0 | 0 | 0 | 0 | 11.98125 | 41.3939286 | 78.6744828 | 147.639 | 228.5714286 | 763.71 | 5981.67 |
Average_Cash_advance | 8950 | 0 | 208.9051168 | 535.6555 | 0 | 0 | 0 | 0 | 0 | 0 | 247.04197 | 561.180268 | 926.7585355 | 1909.68 | 14836.45 |
So you only want replace any value above 95th percentile with 95th percentile.
%let low=0.05 ;
%let high=0.95 ;
proc iml;
use have;
read all var _num_ into x[c=vname];
close have;
call qntl(q,x,{&low ,&high});
do i=1 to ncol(x);
*x[loc(x[,i]<q[1,i]),i]=q[1,i];
x[loc(x[,i]>q[2,i]),i]=q[2,i];
end;
create want from x[c=vname];
append from x;
close want;
quit;
What kind of outpu do you want ? you want use 95th percentile to replace any value greater than 95th percentile?
Or you want winsorize data ? Here is IML code:
data have;
do i=1 to 100;
a=ceil(ranuni(1)*100);
b=ceil(ranuni(2)*100);
output;
end;
drop i;
run;
%let low=0.05 ;
%let high=0.95 ;
proc iml;
use have;
read all var _num_ into x[c=vname];
close have;
call qntl(q,x,{&low ,&high});
do i=1 to ncol(x);
x[loc(x[,i]<q[1,i]),i]=q[1,i];
x[loc(x[,i]>q[2,i]),i]=q[2,i];
end;
create want from x[c=vname];
append from x;
close want;
quit;
Join us for SAS Innovate April 16-19 at the Aria in Las Vegas. Bring the team and save big with our group pricing for a limited time only.
Pre-conference courses and tutorials are filling up fast and are always a sellout. Register today to reserve your seat.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.