The following code implements the shortest path approach:
data have;
input Customer $ PD percent. Def;
datalines;
0001 0.24% 0
0002 0.02% 0
0003 0.89% 0
0004 3.50% 0
0005 6.80% 1
0006 0.008% 0
0007 15.20% 1
0008 28.20% 1
0009 2.23% 0
0010 1.25% 1
;
proc sort data=have;
by pd;
run;
%let num_buckets = 5;
proc optmodel;
set CUSTOMERS;
str customer_id {CUSTOMERS};
num pd {CUSTOMERS};
num def {CUSTOMERS};
read data have into CUSTOMERS=[_N_] customer_id=customer pd def;
num dummy_customer init card(CUSTOMERS)+1;
CUSTOMERS = CUSTOMERS union {dummy_customer};
set CUSTOMER_PAIRS = {i in CUSTOMERS, j in CUSTOMERS: i < j};
num avg_pd {<i,j> in CUSTOMER_PAIRS} =
(sum {k in i..j-1} pd[k]) / (j-i);
num avg_def {<i,j> in CUSTOMER_PAIRS} =
(sum {k in i..j-1} def[k]) / (j-i);
num error {<i,j> in CUSTOMER_PAIRS} =
abs(avg_pd[i,j] - avg_def[i,j]);
set CUSTOMERS_BUCKETS = CUSTOMERS cross 1..&num_buckets+1;
num node_id {CUSTOMERS_BUCKETS};
set NODES = 0..card(CUSTOMERS_BUCKETS)-1;
num node_to_customer {NODES};
num id init 0;
for {<i,b> in CUSTOMERS_BUCKETS} do;
node_id[i,b] = id;
node_to_customer[id] = i;
id = id + 1;
end;
set <num,num> ARCS init {};
num weight {ARCS};
for {<i,j> in CUSTOMER_PAIRS} do;
for {b in 1..&num_buckets} do;
ARCS = ARCS union {<node_id[i,b],node_id[j,b+1]>};
weight[node_id[i,b],node_id[j,b+1]] = error[i,j];
end;
end;
set <num,num,num,num,num> PATHS; /* source, sink, order, from, to */
set SOURCES = {node_id[1,1]};
set SINKS = {node_id[dummy_customer,&num_buckets+1]};
solve with network /
direction = directed
links = (weight=weight)
shortpath = (source=SOURCES sink=SINKS)
out = (sppaths=PATHS)
;
put _NETWORK_OBJECTIVE_=;
put PATHS;
/* highest pd value in each bucket */
num cutoff {1..&num_buckets};
num avg_pd_b {1..&num_buckets};
num avg_def_b {1..&num_buckets};
num error_b {1..&num_buckets};
num customer_from, customer_to;
for {<source, sink, order, from, to> in PATHS} do;
customer_from = node_to_customer[from];
customer_to = node_to_customer[to];
cutoff[order] = pd[customer_to-1];
avg_pd_b[order] = avg_pd[customer_from,customer_to];
avg_def_b[order] = avg_def[customer_from,customer_to];
error_b[order] = error[customer_from,customer_to];
end;
print cutoff avg_pd_b avg_def_b error_b;
/* customers in each bucket */
set <str> CUSTOMERS_b {1..&num_buckets};
for {<source, sink, order, from, to> in PATHS}
CUSTOMERS_b[order] = setof {k in node_to_customer[from]..node_to_customer[to]-1} customer_id[k];
for {b in 1..&num_buckets} put CUSTOMERS_b[b]=;
quit;
[1]
cutoff
avg_pd_b
avg_def_b
error_b
1
0.00008
0.00008
0.00000
0.00008
2
0.00020
0.00020
0.00000
0.00020
3
0.00240
0.00240
0.00000
0.00240
4
0.00890
0.00890
0.00000
0.00890
5
0.28200
0.09530
0.66667
0.57137
... View more