/*2 place where input needs to be changed SIMULTANEOUSLY LINE16 & LINE17 FOLLOW THE GRID BELOW ON NAMING CONVENTIONS*/ /*FINAL1 GROUP WO 10 - CLUSTER1 */ /*FINAL2 GROUP WO 9 - CLUSTER2 */ /*FINAL3 GROUP WO 8 - CLUSTER3 */ /*FINAL4 GROUP WO 7 - CLUSTER4 */ /*FINAL5 GROUP WO 6 - CLUSTER5 */ /*FINAL6 GROUP WO 5 - CLUSTER6 */ /*FINAL7 GROUP WO 4 - CLUSTER7 */ /*FINAL8 GROUP WO 3 - CLUSTER8 */ /*FINAL9 GROUP WO 2 - CLUSTER9 */ /*FINAL10 GROUP WO 1 - CLUSTER10 */ /*>>>>>CHANGE HERE FOLLOW NM CONVEN ABOVE >>> */ %LET ClusterGrp = CLUSTER1; %LET Finalf_ = Final1; /*FINAL OUTPUT FILE. THIS NEED TO BE CHANGED ONLY ONCE BEFORE "RUN 3" is EXECUTED*/ %LET FinalMean = DIAB001; data WORK.inclusterdat_random9; set &ClusterGrp.; run; /*STANDARD CLUSTERING ALGORITHM*/ /*STEP 3*/ /*Measure Euclidean distance between data points*/ proc distance data=WORK.inclusterdat_random9 out=distancedat method=Euclid; var interval(METRIC); id contract_id; run; /*STEP 4*/ /*cluster creation using Ward's minimum variance*/ proc cluster data=distancedat method=ward /*trim=1 k=3*/ outtree=treedat noprint; id contract_id; run; /*STEP 5*/ /* replace NSTARS with numeric constant (# of star levels = 5)*/ proc tree data=treedat ncl=5 horizontal out=outclusterdat noprint; id contract_id; run; /*STEP 6*/ /*Join measure score to distinct cluster groupings*/ proc sql; CREATE TABLE WORK.JOIN AS SELECT a.*, b.METRIC FROM work.outclusterdat a INNER JOIN WORK.inclusterdat_random9 b ON a.contract_id = b.contract_id order by CLUSTER DESC; /*STEP7*/ /*Create upper bound threshold cutoff*/ /*NOTE***Cluster output is naturally unordered so it needs to be reordered depending on measure (higher=better or lower=better)***/ /*NAME OUT FILE TO NEW DATASET HERE */ /*This step needs to by run multiple times to create the followig files - */ proc sql; CREATE TABLE &Finalf_. AS SELECT DISTINCT cluster, MIN(METRIC) AS LOWER_BOUND FROM WORK.JOIN WHERE cluster BETWEEN 1 AND 5 GROUP BY cluster ORDER BY LOWER_BOUND ASC;