Hello, I'm trying to make banks clients segmentation and I failed in making some plots. Not working properly code is bold. Please help me. preview of standrazied dataset distribution of RMSE - wrong Also not correct Super wrong cluster plot proc stdize data=dane out=Stand method=std;
var Duration Age Credit_amount;
run;
/* k-means*/
proc fastclus data=Stand out=Clust
maxclusters=5 maxiter=100;
var Duration Age Credit_amount;
run;
proc freq data=Clust;
tables Purpose*Cluster;
run;
/*Definition of a macro program called multseed, which takes the number of initial measures as the argument. It allows to find several cluster solutions with different starting points*/
Title "Multseed macro for finding multiple
cluster solutions with different starting seeds";
%macro multseed(n);
%do i = 1 %to &n;
proc surveyselect data=Clust out=seeds n=5
seed=&i noprint;
run;
proc fastclus data=Clust maxc=5 converge=0
drift seeds=seeds outstat=out least=2
noprint mean=meanout;
var Duration Age Credit_amount;
run;
data out;
set out (where=(_type_="WITHIN_STD"));
seed=&i;
run;
data ans;
set out
%if &i>1 %then %do;
ans
%end;
;
keep seed over_all Duration Age Credit_amount;
run;
%end;
%mend;
/*Using multseed with n=100*/
%multseed(100);
/*Comparative analysis of results for 100 segmentation models from different starting seeds*/
proc univariate data=ans plot;
var OVER_ALL;
id seed;
run;
/* it's not working */
/*Building a histogram of RMSE values for 100 different solutions of the K-means method at different starting points*/
Title "Distribution of RMSE";
proc sgplot data=ans;
histogram over_all / scale=count binwidth=.01;
xaxis display=(nolabel);
run;
/*Building a scatter plot of estimated cluster centers for 100 different solutions of the K-means method at various start points*/
Title "Estimated cluster centers of 100 different K-means solutions";
proc sgplot data=ans;
scatter x=Duration y=Credit_amount;
run;
/*Best solution out of 100 seeds*/
Title "Best solution out of 100 seeds";
proc surveyselect data=Stand out=seeds n=5 seed=66;
run;
proc fastclus data=Stand maxc=5 converge=0 drift seed=seeds
outstat=out least=2 mean=meanout out=asgn;
var Duration Age Credit_amount;
run;
/* THIS IS NOT WORKING PROPERLY */
/*Creating a chart showing average clusters and assignments to them as part of the best solution*/
Title "Plot with the best means and cluster assignments";
data clusout;
set asgn meanout
(keep=Duration Age Credit_amount);
length clus $6.;
if cluster=. then clus="Center";
else clus=put(cluster, $1.);
label clus="Cluster";
run;
data myanno;
drawspace='datavalue'; function='line';
x1=-.1; y1=3.5; x2=4.5; y2=3.5; output;
x1=4.5; y1=4.5; x2=7.1; y2=4.5; output;
run;
proc sgplot data=clusout sganno=myanno;
scatter x=Duration y=Credit_amount / group=clus;
refline 4.5 / axis=x;
lineparm x=2.5 y=0 slope=-1;
run;
... View more