Rob, I appreciate your response as I am very interested in creating an OPTMODEL for this. However, I think that the result is slightly different from what I was actually looking for (my fault) and have clarified my question in a reply in a previous post. If you get a chance to modify for the output I am looking for I would appreciate it.
Basically, instead of combing the sum of all variables together, I want to treat them independently, and make the mean (or sum) of each variable similar between the 4 groups.
So for example, group1 would have a mean of 10 for "x", 15 for "y" and 12 for "z".
Group2 would have a mean of 11 for "x", 14 for "y" and 13 for "z" (so that the means of each variable are similar among groups).
This is for designing a pilot test, so the means or sums of each group need to be similar to remove variability from the design experiment.
Thanks,
Tom
So you have multiple objectives. Here is one way to combine them into a single objective that minimizes the sum of the ranges:
data have;
input x1-x3;
datalines;
10 15 8
11 14 9
12 16 10
9 16 8
8 12 10
8 10 11
11 14 12
13 10 9
8 16 15
10 11 8
11 15 14
16 13 10
10 15 14
12 8 14
7 12 13
9 14 10
;
%let num_vars = 3;
%let num_groups = 4;
proc optmodel;
set VARS = 1..&num_vars;
set GROUPS = 1..&num_groups;
set OBS;
num a {OBS, VARS};
read data have into OBS=[_N_] {j in VARS} <a[_N_,j]=col('x'||j)>;
/* Assign[i,g] = 1 if observation i assigned to group g, 0 otherwise */
var Assign {OBS, GROUPS} binary;
con AssignOnce {i in OBS}:
sum {g in GROUPS} Assign[i,g] = 1;
con NearlyEqual {g in GROUPS}:
floor(card(OBS)/&num_groups) <= sum {i in OBS} Assign[i,g] <= ceil(card(OBS)/&num_groups);
impvar GroupSum {g in GROUPS, j in VARS} = sum {i in OBS} a[i,j] * Assign[i,g];
var MinSum {VARS}, MaxSum {VARS};
con MinSumCon {g in GROUPS, j in VARS}:
MinSum[j] <= GroupSum[g,j];
con MaxSumCon {g in GROUPS, j in VARS}:
MaxSum[j] >= GroupSum[g,j];
impvar Range {j in VARS} = MaxSum[j] - MinSum[j];
min Objective = sum {j in VARS} Range[j];
solve;
print Assign;
print GroupSum;
num groupID {OBS};
for {i in OBS} do;
for {g in GROUPS: Assign[i,g].sol > 0.5} do;
groupID[i] = g;
leave;
end;
end;
create data want(drop=i) from [i] {j in VARS} <col('x'||j)=a[i,j]> groupID;
quit;
The resulting optimal solution has an objective value of (42 - 41) + (55 - 52) + (44 - 43) = 5:
SAS Output
Assign | ||||
---|---|---|---|---|
1 | 2 | 3 | 4 | |
1 | 1 | 0 | 0 | 0 |
2 | 0 | 1 | 0 | 0 |
3 | 0 | 1 | 0 | 0 |
4 | 0 | 0 | 0 | 1 |
5 | 0 | 0 | 1 | 0 |
6 | 0 | 1 | 0 | 0 |
7 | 0 | 0 | 0 | 1 |
8 | 1 | 0 | 0 | 0 |
9 | 0 | 0 | 1 | 0 |
10 | 0 | -0 | 1 | 0 |
11 | 1 | 0 | 0 | 0 |
12 | 0 | 0 | 1 | 0 |
13 | 0 | 1 | 0 | 0 |
14 | 0 | 0 | 0 | 1 |
15 | 1 | 0 | 0 | 0 |
16 | 0 | 0 | -0 | 1 |
GroupSum | |||
---|---|---|---|
1 | 2 | 3 | |
1 | 41 | 52 | 44 |
2 | 41 | 55 | 44 |
3 | 42 | 52 | 43 |
4 | 41 | 52 | 44 |
If the variables have different scales, you might want to instead minimize a weighted sum of ranges:
min Objective = sum {j in VARS} weight[j] * Range[j];
For example, you might take the weight of variable j to be 1 / max {i in OBS} a[i,j].
Thank you for the quick response!
So in your example, if I did want to use the weighted variable, I would have to add something like this:
impvar Weight {j in VARS} = 1 / max {i in OBS} a[i,j];
min Objective = sum {j in VARS} weight[j] * Range[j];
I think I'm missing something still because I get a message in the log after the "impvar" statement saying "WARNING 832-782: The defining expression does not depend on variables. Please use a parameter instead."
num weight {j in VARS} = 1 / max {i in OBS} a[i,j];
I was hoping it was that simple of a change...Thank you!
The following code is for any number of groups. Just change it at : group=5; /*divide into 5 groups*/ NOTE: the obs with zeros in all X Y Z are dummy obs. You can ignore them. data have; input x y z; datalines; 10 15 8 11 14 9 12 16 10 9 16 8 8 12 10 8 10 11 11 14 12 13 10 9 8 16 15 10 11 8 11 15 14 16 13 10 10 15 14 12 8 14 7 12 13 9 14 10 ; run; proc iml; use have; read all var _num_ into age[c=vname] ; close ; start function(x) global(age,group,nrow); if ncol(x) ^= ncol(unique(x)) then x=ranperm(nrow,1); y=shape(x,group,0,0); sum_group=j(group,1,.); do i=1 to group; sum_group[i]=age[y[i,],][+]; end; obj=range(sum_group); return (obj); finish; start switch_mut(s) global(nswitches,nrow); if ncol(s) ^= ncol(unique(s)) then s=ranperm(nrow,1); else do; n = ncol(s); do i = 1 to nswitches; k1 = int(uniform(1234)*n) + 1; k2 = int(uniform(1234)*n) + 1; if k1^=k2 then do; temp=s[k2]; s[k2] = s[k1]; s[k1] = temp; end; end; end; finish; start uniform_cross(child1, child2, parent1, parent2) global(nrow); child1 = parent1; child2 = parent2; do i = 1 to ncol(parent1); r = uniform(1234); if r<=0.5 then do; child1[i] = parent2[i]; child2[i] = parent1[i]; end; end; if ncol(child1) ^= ncol(unique(child1)) then child1=ranperm(nrow,1); else if ncol(child2) ^= ncol(unique(child2)) then child2=ranperm(nrow,1); finish; nrow=nrow(age); obs=t(1:nrow); group=5; /*divide into 5 groups*/ mod=mod(nrow,group); if mod^=0 then do; remain=group-mod; age=age//j(remain,ncol(age),0); nrow=nrow(age); obs=t(1:nrow); end; nswitches = 3; encoding=j(2,nrow,1); encoding[2,]=nrow; id=gasetup(2,nrow,1234); call gasetobj(id,0,"function"); call gasetcro(id,0.95,0,"uniform_cross"); call gasetmut(id,0.95,0,"switch_mut"); call gasetsel(id,100,1,1); call gainit(id,1000,encoding); niter = 10000; do i = 1 to niter; call garegen(id); call gagetval(value, id); end; call gagetmem(mem, value, id, 1); y=shape(mem,group,0,0); sum_group=j(group,2,.); do i=1 to group; sum_group[i,1]=i; sum_group[i,2]=age[y[i,],][+]; end; Memebers=colvec(row(y))||obs[mem,]||age[mem,]; vnames={group obs}||vname; print "Members:" Memebers[c=vnames], "Group sum:" sum_group[c={group sum}], "Min Value: " value[l = ""] ; call gaend(id); quit; OUTPUT: GROUP OBS x y z 1 14 12 8 14 1 12 16 13 10 1 17 0 0 0 1 4 9 16 8 2 10 10 11 8 2 6 8 10 11 2 8 13 10 9 2 5 8 12 10 3 7 11 14 12 3 2 11 14 9 3 20 0 0 0 3 13 10 15 14 4 11 11 15 14 4 19 0 0 0 4 16 9 14 10 4 1 10 15 8 5 3 12 16 10 5 18 0 0 0 5 9 8 16 15 5 15 7 12 13 GROUP SUM 1 106 2 120 3 110 4 106 5 109 Min Value: 14
With five groups, the PROC OPTMODEL code finds that 14 is the minimum range:
[1] | GroupSum |
---|---|
1 | 107 |
2 | 110 |
3 | 108 |
4 | 106 |
5 | 120 |
Rob,
OR well done. I can't image SAS/OR can do solve so many different kind of challenge .
All,
I have modified my question slightly since I don't believe I explained it properly the first time.
I am trying to create an experiment and want to design it in a way that I have 4 groups, that are similar among a few variables. These variables can be different things, so "x" could be "# of site visits", and "y" could be "conversion rate" for example. So the scale could be vastly different for each variable (e.g. x=10, y=15%, z= 1,000). I am not trying to sum the 3 variables together, I want to treat them individually and have similar variable means (or sums) across groups.
So for Group1, I want "x" to have a similar mean as Group2,3,and 4's "x" variable. Same with the "y" and "z" variables as well.
I hope this helps, sorry for the misunderstanding (I will try and be more clear explaining the example in the future).
Thanks,
Tom
Of course. I would not miss this kind of question.
Change
group=4; /*divide into 5 groups*/
for any number of groups .
data have;
input x y z;
datalines;
10 15 8
11 14 9
12 16 10
9 16 8
8 12 10
8 10 11
11 14 12
13 10 9
8 16 15
10 11 8
11 15 14
16 13 10
10 15 14
12 8 14
7 12 13
9 14 10
;
run;
proc iml;
use have;
read all var _num_ into age[c=vname] ;
close ;
start function(x) global(age,group,nrow,ncol);
if ncol(x) ^= ncol(unique(x)) then x=ranperm(nrow,1);
y=shape(x,group,0,0);
sum_group=j(group,ncol,.);
do i=1 to group;
sum_group[i,]=age[y[i,],][+,];
end;
obj=sum(sum_group[<>,]-sum_group[><,]);
return (obj);
finish;
start switch_mut(s) global(nswitches,nrow);
if ncol(s) ^= ncol(unique(s)) then s=ranperm(nrow,1);
else do;
n = ncol(s);
do i = 1 to nswitches;
k1 = int(uniform(1234)*n) + 1;
k2 = int(uniform(1234)*n) + 1;
if k1^=k2 then do;
temp=s[k2];
s[k2] = s[k1];
s[k1] = temp;
end;
end;
end;
finish;
start uniform_cross(child1, child2, parent1, parent2) global(nrow);
child1 = parent1;
child2 = parent2;
do i = 1 to ncol(parent1);
r = uniform(1234);
if r<=0.5 then do;
child1[i] = parent2[i];
child2[i] = parent1[i];
end;
end;
if ncol(child1) ^= ncol(unique(child1)) then child1=ranperm(nrow,1);
else if ncol(child2) ^= ncol(unique(child2)) then child2=ranperm(nrow,1);
finish;
ncol=ncol(age);
nrow=nrow(age);
obs=t(1:nrow);
group=4; /*divide into 5 groups*/
mod=mod(nrow,group);
if mod^=0 then do;
remain=group-mod;
age=age//j(remain,ncol,0);
nrow=nrow(age);
obs=t(1:nrow);
end;
nswitches = 3;
encoding=j(2,nrow,1);
encoding[2,]=nrow;
id=gasetup(2,nrow,123456789);
call gasetobj(id,0,"function");
call gasetcro(id,0.95,0,"uniform_cross");
call gasetmut(id,0.95,0,"switch_mut");
call gasetsel(id,100,1,1);
call gainit(id,1000,encoding);
niter = 10000;
do i = 1 to niter;
call garegen(id);
call gagetval(value, id);
end;
call gagetmem(mem, value, id, 1);
y=shape(mem,group,0,0);
sum_group=j(group,ncol+1,.);
do i=1 to group;
sum_group[i,1]=i;
sum_group[i,2:ncol+1]=age[y[i,],][+,];
end;
Memebers=colvec(row(y))||obs[mem,]||age[mem,];
vnames={group obs}||vname;
print Memebers[c=vnames l = ""],
sum_group[c=({group}||('sum_'+vname)) l = ""],
"Min Value: " value[l = ""] ;
call gaend(id);
quit;
Hi
The problem seems to be one of Stratification as in Stratified Random Sampling.
If so then you may also consider using PROC SURVEYSELECT and / or its variations
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.