I would recommend BY-group processing by using the WHERE clause on the READ statement, as shown in this blog post:
In addition, you can use the "SETIN" and "SETOUT" statements to have two data sets open simultaneously and read and write to them in a loop. Refer to this blog post for a review of the available options:
Using the above techniques, you can remove the macro and the macro loop and instead use a pure IML loop over the number of BY-groups, as determined by PROC FREQ.
I have rewritten your program and modified your dataset to cover a more general case:
data untrimmed;
input group lag2cvrank lagcfo_ts cfo_ts lag2cfo_ts;
cards;
1991 0 155 175 165
1991 0 200 225 250
1991 0 75 125 135
1991 0 350 375 400
1991 1 155 175 165
1991 1 200 225 250
1991 1 75 125 135
1992 0 155 175 165
1992 0 200 225 250
1992 0 75 125 135
1992 0 350 375 400
1992 2 155 175 165
1992 2 200 225 250
1992 2 75 125 135
;
/* find unique BY-group combinations */
proc freq data=untrimmed;
tables group*lag2cvrank / out=FreqOut;
run;
proc iml;
/* Suggest using the Theil-Sen function
https://blogs.sas.com/content/iml/2019/05/28/theil-sen-robust-regression.html
because it handles the case of infinite slope.*/
start compute_slopes(XY);
c = allcomb(nrow(XY), 2); /* all "N choose 2" combinations of pairs */
group = XY[1,3]; /* extract group from XY, to be used as a BY variable later */
lag2cvrank = XY[1,4]; /* extract lag2cvrank from XY, to be used as a BY variable later */
Pt1 = XY[c[,1],]; /* extract first point of line segments */
Pt2 = XY[c[,2],]; /* extract second point of line segments */
slope = (Pt1[,2] - Pt2[,2]) / (Pt1[,1] - Pt2[,1]); /* (y1 - y2)/(x1 - x2) */
m = median(slope); /* scalar */
b = median( XY[,2] - m*XY[,1] ); /* median(y-mx) */ /* Scalar */
mxy = m*XY[,2]; /* N x 1 Vector */
t = nrow(XY); /* number of rows in XY */
int = J(t, 1, b); /* nrow x 1 column vector */
p = int + mxy; /* nrow x 1 column vector */
print (b||m)[c={'Intercept' 'Slope'} L="Method=Theil Combs=All"];
group_col = J(t, 1, group);
lag2cvrank_col = J(t, 1, lag2cvrank);
return (int || mxy || p || group_col || lag2cvrank_col);
finish;
/* read the BY groups */
use FreqOut nobs NumGroups;
read all var {group lag2cvrank};
close FreqOut;
print NumGroups, group lag2cvrank;
use work.untrimmed;
create ts_1 var {int mxy p group_col lag2cvrank_col};
setin work.untrimmed;
setout ts_1;
inVarNames = {"lag2cfo_ts" "lagcfo_ts" "group" "lag2cvrank"};
do i = 1 to NumGroups; /* for each BY group */
read all var inVarNames into XY
where(group=(group[i]) & lag2cvrank=(lag2cvrank[i]));
/* X contains data for i_th group; analyze it */
G = compute_slopes(XY);
/* extract the columns of the matrix */
int=G[,1]; mxy=G[,2]; p=G[,3]; group_col=G[,4]; lag2cvrank_col=G[,5];
append;
end;
close work.untrimmed;
close ts_1;
... View more