hey,
I have a piece of matlab code to generate normally distributed cluster data. Since I don't have matlab installed in my PC, I am thinking to translate it to SAS IML. I just know a little with matlab. could anyone help with this? The matlab code is as below. Thanks.
%
rand('state',91225);
randn('state',19481);
lowBound = -50;
highBound =50;
nCenters = 20;
nCols = 32;
nRows = 20000;
nTestRows = 0.01 * nRows;
nBufferPoints = 100000;
nExpandFactor = 10; % How much to stretch the covariance matrix
sTrainFile = 'outtrain.txt';
sTestFile = 'outtest.txt';
% Generate the centers according to a uniform distibution.
mCenters = round(lowBound + rand(nCenters,nCols)*(highBound-lowBound));
% Generate the variances and covariances randomly to create a matrix for
% each center
mCovariance = zeros(nCols,nCols);
cCovariance = cell(nCenters,1);
for i = 1:nCenters,
mRootCovariance = nExpandFactor * ...
rand(nCols,nCols)*(highBound-lowBound) / 50;
cCovariance{i} = mRootCovariance' * mRootCovariance;
end;
% Determine what proportion of points will come from each center, then
% create a cdf to use in deciding which to generate.
vPointFraction = rand(nCenters,1);
vPointFraction = vPointFraction / sum(vPointFraction);
vPointCdf = zeros(1,nCenters);
for i = 1:nCenters,
vPointCdf(i) = sum(vPointFraction(1:i));
end;
% Create a random separating plane.
w = -2 + rand(nCols,1)*4;
gamma = lowBound / 10 + rand * (highBound-lowBound)/10;
% Now choose which classes to which each center belongs
vCenterClasses = sign(mCenters * w - gamma * ones(nCenters,1));
vZeroSpots = find(vCenterClasses==0);
vCenterClasses(vZeroSpots) = ones(length(vZeroSpots),1);
% Prepare output file
flatfile([],sTrainFile,0);
flatfile([],sTestFile,0);
% Now go through and begin generating random points.
% Do it twice: once for testing, once for training.
for nDataset = 1:2,
if (nDataset==1)
nRowsLeft = nRows;
sOutputFile = sTrainFile;
nTotRows = nRows;
else
nRowsLeft = nTestRows;
sOutputFile = sTestFile;
nTotRows = nTestRows;
end;
nMisclass = 0;
nTrainingClass1 = 0;
nTrainingClassm1 = 0;
while (nRowsLeft > 0)
disp(sprintf('Rows left = %d',nRowsLeft));
nRowsNow = min(nBufferPoints,nRowsLeft);
nRowsLeft = nRowsLeft - nRowsNow;
mNewPoints = zeros(nRowsNow,nCols);
vPointCenters = zeros(nRowsNow,1);
% Determine which center each point should belong to
vRandomNumbers = rand(nRowsNow,1);
for i = nCenters:-1:1,
vCenterMatch = (vRandomNumbers <= vPointCdf(i));
vPointCenters([vCenterMatch]) = i;
end;
% Create a vector of training classes for each point
vTrainingClasses = zeros(nRowsNow,1);
% Within each class, generate an appropriate number of random points.
for i = 1:nCenters,
vIndices = (vPointCenters==i);
nPoints = sum(vIndices);
vTrainingClasses(vIndices) = vCenterClasses(i);
mNewPoints(vIndices,:) = round( ...
mvnrnd(mCenters(i,:),cCovariance{i},nPoints));
% Count how many points are incorrectly classified
vFitClass = sign(mNewPoints(vIndices,:) * w - gamma * ...
ones(nPoints,1));
vZeroSpots = find(vFitClass==0);
vFitClass(vZeroSpots) = ones(length(vZeroSpots),1);
nMisclass = nMisclass + sum(vFitClass~=vCenterClasses(i));
end; %for
% Output the data points to disk
flatfile([mNewPoints vTrainingClasses],sOutputFile,1);
nTrainingClass1 = nTrainingClass1 + sum(vTrainingClasses==1);
nTrainingClassm1 = nTrainingClassm1 + sum(vTrainingClasses==-1);
end; %while
disp(sprintf('Percent separable estimate = %4.2f%%\n',100*(1-nMisclass/nTotRows)));
disp(sprintf('Number class 1 points = %d\n',nTrainingClass1));
disp(sprintf('Number class -1 points = %d\n',nTrainingClassm1));
end; %for-nDataset