The following PROC OPTMODEL code reads the additional cust_cnt column, handles the lower and upper limits on customers per bucket, and minimizes your new objective. It also improves efficiency in several places, for example by recursively computing avg_def.
data have;
call streaminit(1234);
do c=1 to 99;
customer = put(c,z4.);
pd=rand('uniform');
def=rand('bern',0.5);
cust_cnt=ceil(10*rand('uniform'));
output;
end;
format pd percent8.2;
run;
proc sort data=have;
by pd;
run;
%let num_buckets = 5;
%let pct_lb = 0.01;
%let pct_ub = 0.25;
proc optmodel;
/* read data */
set OBS;
str obs_id {OBS};
num pd {OBS};
num def {OBS};
num cust_cnt {OBS};
read data have into OBS=[_N_] obs_id=customer pd def cust_cnt;
/* build network */
num dummy_obs init card(OBS)+1;
OBS = OBS union {dummy_obs};
num num_cust_total = sum {i in OBS diff {dummy_obs}} cust_cnt[i];
num num_cust_lb = ceil( &pct_lb*num_cust_total);
num num_cust_ub = floor(&pct_ub*num_cust_total);
set <num,num> OBS_PAIRS init {};
num num_cust_ij {OBS_PAIRS};
num count;
for {i in OBS} do;
count = 0;
for {j in OBS: i < j} do;
count = count + cust_cnt[j-1];
if count < num_cust_lb then continue;
else if count > num_cust_ub then leave;
else do;
OBS_PAIRS = OBS_PAIRS union {<i,j>};
num_cust_ij[i,j] = count;
end;
end;
end;
/* avg_def[i,j] = (sum {k in i..j-1} def[k] * cust_cnt[k]) / num_cust_ij[i,j] */
num avg_def {<i,j> in OBS_PAIRS} = (
if <i,j-1> not in OBS_PAIRS then (sum {k in i..j-1} def[k] * cust_cnt[k]) / num_cust_ij[i,j]
else (avg_def[i,j-1] * num_cust_ij[i,j-1] + def[j-1] * cust_cnt[j-1]) / num_cust_ij[i,j]
);
num error {<i,j> in OBS_PAIRS} = sum {k in i..j-1} abs(pd[k] - avg_def[i,j]);
set OBS_BUCKETS = OBS cross 1..&num_buckets+1;
num node_id {OBS_BUCKETS};
set NODES = 0..card(OBS_BUCKETS)-1;
num node_to_obs {NODES};
num id init 0;
for {<i,b> in OBS_BUCKETS} do;
node_id[i,b] = id;
node_to_obs[id] = i;
id = id + 1;
end;
set ARCS =
setof {<(1),j> in OBS_PAIRS, b in {1}} <node_id[1,b],node_id[j,b+1]>
union
setof {<i,j> in OBS_PAIRS, b in 2..&num_buckets-1: i ne 1 and j ne dummy_obs} <node_id[i,b],node_id[j,b+1]>
union
setof {<i,(dummy_obs)> in OBS_PAIRS, b in {&num_buckets}} <node_id[i,b],node_id[dummy_obs,b+1]>;
num weight {<from,to> in ARCS} = error[node_to_obs[from],node_to_obs[to]];
set <num,num,num,num,num> PATHS; /* source, sink, order, from, to */
set SOURCES = {node_id[1,1]};
set SINKS = {node_id[dummy_obs,&num_buckets+1]};
/* call network solver using shortest path algorithm */
solve with network /
direction = directed
links = (weight=weight)
shortpath = (source=SOURCES sink=SINKS)
out = (sppaths=PATHS)
;
put _NETWORK_OBJECTIVE_=;
put PATHS;
/* highest pd value in each bucket */
num cutoff {1..&num_buckets};
num avg_def_b {1..&num_buckets};
num error_b {1..&num_buckets};
num num_cust_b {1..&num_buckets};
num pct_cust_b {b in 1..&num_buckets} = num_cust_b[b] / num_cust_total;
set <str> OBS_b {1..&num_buckets};
num obs_from, obs_to;
for {<source, sink, order, from, to> in PATHS} do;
obs_from = node_to_obs[from];
obs_to = node_to_obs[to];
cutoff[order] = pd[obs_to-1];
avg_def_b[order] = avg_def[obs_from,obs_to];
error_b[order] = error[obs_from,obs_to];
num_cust_b[order] = num_cust_ij[obs_from,obs_to];
OBS_b[order] = setof {k in node_to_obs[from]..node_to_obs[to]-1} obs_id[k];
end;
print _NETWORK_OBJECTIVE_ cutoff avg_def_b error_b num_cust_b pct_cust_b;
/* observations in each bucket */
/* for {b in 1..&num_buckets} put OBS_b[b]=;*/
quit;
SAS Output
_NETWORK_OBJECTIVE_
23.104
[1]
cutoff
avg_def_b
error_b
num_cust_b
pct_cust_b
1
0.22811
0.62016
12.4959
129
0.24157
2
0.48268
0.39231
1.7456
130
0.24345
3
0.71467
0.54054
1.4654
111
0.20787
4
0.94800
0.63025
4.6940
119
0.22285
5
0.99839
0.53333
2.7026
45
0.08427
... View more