@IanWakeling ... continuing with the subscript theme:
I need to create a idx matrix dynamically.
X2=X[(1:5)||(20:30),];
In the above matrices, the last one determines how many elements get selected. This gets applied to the first matrix where row1 and row2 are the start end index.
Here I want to select:
X2=X[(1:18) || (49:(49+59) || ...];
I can solve it with a do loop, but it there a more elegant way to achieve this:
H3 is the first matrix and H5 the last.
H6=DO(H3[1,1],H5[1],1);
DO I=2 TO NCOL(H5);
H7=DO(H3[1,I],H3[1,I] + H5[I] -1,1);
H6=H6 || H7;
END;
H6=H6`;
PRINT H6;
I can't think of any elegant way to avoid the loop. If H3 and H5 have a lot of columns, then it is generally not a good idea to 'grow' a matrix by concatenation within a loop, since it involves creating a new matrix for each iteration. The following avoid this issue by 1st declaring H6 to be large enough to hold everything.
H6 = j(sum(H5), 1);
e = 1;
do i = 1 to ncol(H3);
r = 0:(H5[i] - 1);
H6[ r + e ] = r + H3[1, i];
e = e + H5[i];
end;
I don't understand the question. Can you provide a program that shows what you have and what you want?
What I asked for is a small piece of the following problem I am encountering:
During the task of finding the best regression model, there's always a discussion going on what should be the scope of the data. I am NOT talking here about splitting the universe into training and validation data sets.
Here I allude to the fact that the data owner could recommend taking out certain observations before running the regression.
So I apply the corresponding filters and end up having the data set that feeds into the regression model.
This time I didn't save neither the final data set nor the filter. I only have the universe data set and the regression model estimates.
The task is to shuffle from the universe in such a manner that I approximate the estimates.
I've rebuilt my approach with the SASHELP.CARS data set.
My credit goes to @Ksharp for his many fantastic contributions in the regard of the Genetic Algorithms, especially the https://communities.sas.com/t5/SAS-IML-File-Exchange/Genetic-Algorithm-For-Integer-Programming-Footb....
And to @Rick_SAS for his Sweep operator trick.
Frankly spoken I do not really control the crossover and mutuation options in the GA block. But I am happy with the solution I found.
My code goes like this:
PROC GLM DATA=SASHELP.CARS PLOTS=ALL;
WHERE (UPCASE(Type) NE "HYBRID") * (Cylinders NE .);
CLASS TYPE ;
MODEL MPG_Highway = Cylinders Horsepower TYPE / SOLUTION;
OUTPUT OUT=PRED_CARS PREDICTED=PRED RESIDUAL=RES;
RUN;
PROC SORT DATA=PRED_CARS;
BY Type DESCENDING RES;
RUN;
PROC RANK DATA=WORK.PRED_CARS GROUPS=4 OUT=PRED_CARS1;
BY Type;
VAR RES;
RANKS R_RES;
RUN;
PROC FREQ DATA=WORK.PRED_CARS1;
TABLE TYPE * R_RES;
RUN;
DATA PRED_CARS2;
SET WORK.PRED_CARS1;
WHERE ((UPCASE(TYPE)="TRUCK")*(R_RES NE 0) +
(UPCASE(TYPE)="SPORTS")*(R_RES NE 3) +
(UPCASE(TYPE)="SEDAN")*(R_RES LT 2) +
(UPCASE(TYPE) NOT IN ("TRUCK" "SPORTS" "SEDAN")) );
RUN;
PROC FREQ DATA=WORK.PRED_CARS2;
TABLE TYPE * R_RES;
RUN;
ODS OUTPUT PARAMETERESTIMATES=PARMS;
PROC GLM DATA=WORK.PRED_CARS2 PLOTS=ALL;
CLASS TYPE (REF="SUV") ;
MODEL MPG_Highway = Cylinders Horsepower TYPE / SOLUTION;
RUN;
PROC TRANSREG DATA= SASHELP.CARS DESIGN;
WHERE (UPCASE(Type) NE "HYBRID") * (Cylinders NE .);
MODEL IDENTITY(LENGTH)
CLASS(TYPE / ZERO=NONE );
ID Cylinders Horsepower MPG_Highway Type;
OUTPUT OUT=UNIVERSE(DROP=LENGTH _:) ;
RUN;
DATA UNIVERSE;
FORMAT INTERCEPT 3.0 &_TRGIND;
SET WORK.UNIVERSE;
INTERCEPT=1;
RUN;
PROC SORT DATA=WORK.UNIVERSE;
BY Type;
RUN;
DATA WORK.UNIVERSE;
SET WORK.UNIVERSE;
BY TYPE;
IF FIRST.TYPE THEN I+1;
N=I;
DROP I;
RUN;
PROC SQL;
SELECT "'"||STRIP(NAME)||"'" INTO :VARS SEPARATED BY " "
FROM DICTIONARY.COLUMNS
WHERE LIBNAME="WORK" AND MEMNAME="UNIVERSE";
QUIT;
%PUT &VARS;
PROC IML;
USE WORK.UNIVERSE NOBS NOBS;
READ ALL VAR {N};
READ ALL VAR _NUM_ INTO VALS
[C=varNUMERIC];
READ ALL VAR {"TYPE"};
CLOSE UNIVERSE;
/*the objective values*/
OBJ_B1=-1.1609;
OBJ_B2=-0.022394;
OBJ_B0=33.4; /*this is i.e. the objective value for the intercept*/
OBJ_B3=4.99;
TEST=VALS[,{'INTERCEPT' 'TypeSedan' 'TypeSports'
'TypeTruck' 'TypeWagon' 'TypeSUV' 'Cylinders' 'Horsepower' 'MPG_Highway'}];
S=SWEEP(TEST`*TEST,1:NCOL(TEST)-1);
PRINT S;
START_END=LOC(N^=T({.}||REMOVE(N,NOBS))) //
LOC(N^=T(REMOVE(N,1)||{.}));
H2=START_END[2,]-START_END[1,]+1;
H3=START_END // H2 // CUSUM(H2);
ST_END=H3;
PRINT ST_END;
IDXH=DO(1,NCOL(ST_END)*2,2);
IDX_X=1:NCOL(ST_END)*2;
RANGE=IDX_X[,1:NCOL(ST_END)];
POS=IDX_X[,NCOL(ST_END)+1:NCOL(ST_END)*2];
IDER=(J(1, NCOL(ST_END),0.3) // J(1, NCOL(ST_END),1)) || (J(1, NCOL(ST_END),0) // J(1, NCOL(ST_END),1));
start football(x) global(VALS, ST_END,OBJ_B1,OBJ_B2, OBJ_B0, OBJ_B3, RANGE, POS,
MUESTRA, BETA1, BETA2, BETA0, BETA3, IDX );
RANGER=X[,RANGE];
POSR=X[,POS];
X2=CEIL(ST_END[3,]#RANGER);
IDX=DO(ST_END[1,1],X2[1],1) + FLOOR((ST_END[3,1]-X2[1])*POSR[1]);
DO I=2 TO NCOL(RANGE);
IDX1=DO(ST_END[1,I],ST_END[1,I] + X2[I] -1,1) + FLOOR((ST_END[3,I]-X2[I])*POSR[I]);
IDX=IDX || IDX1;
END;
IDX=IDX`;
MUESTRA=VALS[IDX,{'INTERCEPT' 'TypeSedan' 'TypeSports'
'TypeTruck' 'TypeWagon' 'TypeSUV' 'Cylinders' 'Horsepower' 'MPG_Highway'}];
S=SWEEP(MUESTRA`*MUESTRA,1:NCOL(MUESTRA)-1);
BETA1=S[NROW(S)-2,NCOL(S)];
BETA2=S[NROW(S)-1,NCOL(S)];
BETA0=S[1,NCOL(S)];
BETA3=S[NROW(S)-4,NCOL(S)];
POINTS = ABS((BETA1-OBJ_B1)/OBJ_B1) + ABS((BETA2-OBJ_B2)/OBJ_B2) + ABS((BETA0-OBJ_B0)/OBJ_B0)
+ ABS((BETA3-OBJ_B3)/OBJ_B3);
if NROW(MUESTRA) < 250 | ABS(BETA0 - OBJ_B0) > 1 then points=1;
return (points);
finish;
ENCOD=NCOL(ST_END)*2;
id=gasetup(1,ENCOD,123);
call gasetobj(id,0,"football");
call gasetcro(id,1.0,2);
call gasetmut(id,0.2,2,1);
call gasetsel(id,100,1,0.95);
call gainit(id,1000,IDER);
print id;
niter = 40;
summary = j(niter,3);
mattrib summary [c = {"Min Points", "A?", "B?"} l=""];
do i = 1 to niter;
call garegen(id);
call gagetval(value, id);
summary[i,1] = value[1];
summary[i,2] = value[2];
summary[i,3] = value[3];
end;
call gagetmem(mem, value, id, 1);
PRINT MEM BETA1 BETA2 BETA0 BETA3;
iteration = t(1:niter);
print iteration summary;
call gaend(id);
MUESTRA_CH=TYPE[IDX,];
CREATE MUESTRA_CH FROM MUESTRA_CH [COLNAME='TYPE'];
APPEND FROM MUESTRA_CH;
CLOSE;
CREATE MUESTRA FROM MUESTRA [COLNAME= {'INTERCEPT' 'TypeSedan' 'TypeSports'
'TypeTruck' 'TypeWagon' 'TypeSUV' 'Cylinders' 'Horsepower' 'MPG_Highway'}];
APPEND FROM MUESTRA;
CLOSE;
quit;
DATA MUESTRA_ALL;
MERGE MUESTRA MUESTRA_CH;
RUN;
ODS OUTPUT PARAMETERESTIMATES=PARMS;
PROC GLM DATA=WORK.MUESTRA PLOTS=ALL;
MODEL MPG_Highway = Cylinders Horsepower &_TRGIND / SOLUTION;
RUN;
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how to run multiple linear regression models with and without interactions, presented by SAS user Alex Chaplin.
Find more tutorials on the SAS Users YouTube channel.