I am looking for a SAS Macro that provides Collinearity Diagnostics Using the Information Matrix within Binary Logistic Regression.
I believe that such a macro was constructed in 2003 and then updated in 2010.
However searching via Google, LinkedIn and SAS.Support, I have only been able to find references to the macro
but not the actual code or any documentation on this macro.
I would be most grateful if someone could supply me with a well documented and functionally robust version of this SAS macro.
Thank You
Found one that worked. I have no idea if this is statistically correct, only program wise.
Macro definition:
%macro collin(covdsn=, procdr=, parminfo=);
%if %upcase(&procdr)=GENMOD %then %do;
data next_1;
set &parminfo;
attrib parnum format=$25.;
parnum=parameter;
if parnum='Prm1' then parnum='Prm01';
if parnum='Prm2' then parnum='Prm02';
if parnum='Prm3' then parnum='Prm03';
if parnum='Prm4' then parnum='Prm04';
if parnum='Prm5' then parnum='Prm05';
if parnum='Prm6' then parnum='Prm06';
if parnum='Prm7' then parnum='Prm07';
if parnum='Prm8' then parnum='Prm08';
if parnum='Prm9' then parnum='Prm09';
if parnum='Prm10' then parnum='Prm10';
if parnum='Prm11' then parnum='Prm11';
if parnum='Prm12' then parnum='Prm12';
if parnum='Prm13' then parnum='Prm13';
if parnum='Prm14' then parnum='Prm14';
if parnum='Prm15' then parnum='Prm15';
if parnum='Prm16' then parnum='Prm16';
if parnum='Prm17' then parnum='Prm17';
if parnum='Prm18' then parnum='Prm18';
if parnum='Prm19' then parnum='Prm19';
if parnum='Prm20' then parnum='Prm20'
;
rename parnum=parm;
run;
proc sort data=next_1;
by parm;
run;
data next_1a;
set &covdsn;
attrib parm format=$25.
;
parm=rowname;
if parm='Prm1' then parm='Prm01'
;
if parm='Prm2' then parm='Prm02'
;
if parm='Prm3' then parm='Prm03'
;
if parm='Prm4' then parm='Prm04'
;
if parm='Prm5' then parm='Prm05'
;
if parm='Prm6' then parm='Prm06'
;
if parm='Prm7' then parm='Prm07'
;
if parm='Prm8' then parm='Prm08'
;
if parm='Prm9' then parm='Prm09'
;
if parm='Prm10' then parm='Prm10'
;
if parm='Prm11' then parm='Prm11'
;
if parm='Prm12' then parm='Prm12'
;
if parm='Prm13' then parm='Prm13'
;
if parm='Prm14' then parm='Prm14'
;
if parm='Prm15' then parm='Prm15'
;
if parm='Prm16' then parm='Prm16'
;
if parm='Prm17' then parm
='Prm17'
;
if parm='Prm18' then parm='Prm18'
;
if parm='Prm19' then parm='Prm19'
;
if parm='Prm20' then parm='Prm20'
;
run;
proc sort data=next_1a;
by parm;
run;
data next_2(drop=effect);
merge next_1a( in=in1a)
next_1 (in=in1);
by parm;
if in1a;
parm=effect;
rename parm=_name_;
run;
data next_3;
set next_2;
if _name_='SCALE' then delete;
run;
data next_4;
length _name_ $25
;
_name_= 'ESTIMATE'
;
output;
run;
data next_5;
set next_4
next_3;
run;
proc print data=next_5;
title Input dataset--GENMOD;
run;
%end;
%else %do;
data next_5;
set &covdsn;
run;
proc print data=next_5;
title Input dataset--LOGISTIC/PHREG;
run;
%end;
%if (next_5 ne ) %then %do;
%let __stop=0;
proc iml;
use next_5;
read all var {_name_} into _varname;
_nrvname=nrow(_varname);
if (_nrvname>1) then do;
_varnam2=_varname(|2:_nrvname, |);
nmissing=j(nrow(_varnam2),1,.);
labels={"EIGENVAL","CONDINDX"," "};
_varnam2=labels//_varnam2;
free _varname labels;
read all var _num_ into varcov(|colname=_nvname|);
_nrcvc=ncol(varcov);
lastvnam=_nvname(|1,_nrcvc|);
if (lastvnam="_LNLIKE_") then
varcov2=varcov(|2:_nrvname,1:_nrcvc-1|);
if (lastvnam^="_LNLIKE_") then varcov2=varcov(|2:_nrvname,|);
%* If the covariance matrix is from GENMOD using the repeated measured
design, ;
%* then the lower diagonal will have the correlations and the upper
diagonal will have;
%* the covariances. The next section of code replaces the lower
diagonal with the upper;
%* diagonal to make a symmetric matrix. If the matrix is symmetrical
already, then the;
%* next section of code will not affect anything.;
vc2_c=ncol(varcov2);
vc2_r=nrow(varcov2);
do cl=1 to vc2_c;
do rw=1 to vc2_r;
varcov2(|rw,cl|) = varcov2(|cl,rw|);
end;
end;
print varcov2;
free varcov _nrcvc lastvnam vc2_c vc2_r cl;
covbinv=inv(varcov2);
scale=inv(sqrt(diag(covbinv)));
r=scale*covbinv*scale;
free covbinv scale;
call eigen(musqr,v,r);
free r;
srootmus=sqrt(musqr);
ci=1/(srootmus/max(srootmus));
phi=(v##2)*diag(musqr##(-1));
sumphi=phi(|,+|);
pi=phi#(sumphi##(-1));
free phi sumphi srootmus v;
final=(musqr||ci||nmissing||pi`)`;
free pi musqr ci nmissing;
_ncfinal=ncol(final);
_nrfinal=nrow(final);
final2=j(_nrfinal,_ncfinal,0);
_ncfp1=_ncfinal+1;
__vdp="VDP";
do i=1 to _ncfinal;
final2(|,_ncfp1-i|)=final(|,i|);
x=char(i,3);
y=compress(concat(__vdp,x));
if i=1 then _vdpname=y;
else _vdpname=_vdpname||y;
end;
free final _nrfinal _ncfinal i x y;
create final2 from final2(|rowname=_varnam2 colname=_vdpname|);
append from final2(|rowname=_varnam2|);
free _varnam2 _vdpname final2;
end;
if (_nrvname=1) then do;
x="1";
call symput("__stop",left(x));
print " ";
print
"**********************************************************";
print "You need to specify the covout option";
print "in either proc logistic or proc phreg.";
print "This program will not calculate collinearity
diagnostics.";
print
"**********************************************************";
print " ";
end;
quit;
run;
%if (&__stop eq 0) %then %do;
proc print data=final2 label noobs;
id _varnam2;
title8 "Collinearity diagnostics for nonlinear models using";
title9 "the information matrix: Eigenvalues, Condition Indexes,";
title10 "and Variance Decomposition Proportions (VDPs)";
label _varnam2="VARIABLE";
run;
%end;
%end;
%else %do;
%put;
%put "*******************************************************";
%put "When you invoke this macro, you have to specify the name";
%put "of a SAS data set that contains the variance-covariance";
%put "matrix from LOGISTIC, PHREG, or GENMOD.";
%put;
%put "For more information, see the macro code (comments";
%put "are included with instructions.";
%put "*******************************************************";
%put;
%end;
proc datasets;
delete next_1 next_1a next_2 next_3 next_4 next_5;
run;
quit;
%mend collin;
Sample execution:
proc logistic data=sashelp.heart covout outest=sasdsn;
class chol_status sex;
model status= chol_status ageatstart sex height;
run;
options mprint symbolgen;
%collin(covdsn=sasdsn);
Source: https://etd.library.emory.edu/file/view/pid/emory:939x5/etd/emory:939w1/kharod_dissertation.pdf
What's the name of macro for starters? Do you have any more information, the original author perhaps?
@JonDickens1607 wrote:
I would be most grateful if someone could supply me with a well documented and functionally robust version of this SAS macro.
That may too big of a request, for a forum.
EDIT:
Is this helpful, it seems to be what you're asking for...within the limits of what you've provided.
Do you have a SAS IML license? The macro requires this.
You can check by running:
Proc setinit;
Run;
Post how you called the macro as well as the macro.
You can only include attachments on the website not via email.
Please place the code in the editor, I dislike downloading attachments.
I am using SAS University Edistion 3.5 which includes PROC IML
/* I think that the name of the original macro was %COLLIN and I have
a copy of this macro but it does not work and I get a variety of errors
*/
Post log with options MPRINT turned on with the error messages.
Previous post included a typing error in the SAS Code calling the Macro Collin.
I have attached the SAS LOG File that is generated when I call the macro
%COLLIN(COVDSN = COVBETAS );
I have also attached the SAS Program Code
Found one that worked. I have no idea if this is statistically correct, only program wise.
Macro definition:
%macro collin(covdsn=, procdr=, parminfo=);
%if %upcase(&procdr)=GENMOD %then %do;
data next_1;
set &parminfo;
attrib parnum format=$25.;
parnum=parameter;
if parnum='Prm1' then parnum='Prm01';
if parnum='Prm2' then parnum='Prm02';
if parnum='Prm3' then parnum='Prm03';
if parnum='Prm4' then parnum='Prm04';
if parnum='Prm5' then parnum='Prm05';
if parnum='Prm6' then parnum='Prm06';
if parnum='Prm7' then parnum='Prm07';
if parnum='Prm8' then parnum='Prm08';
if parnum='Prm9' then parnum='Prm09';
if parnum='Prm10' then parnum='Prm10';
if parnum='Prm11' then parnum='Prm11';
if parnum='Prm12' then parnum='Prm12';
if parnum='Prm13' then parnum='Prm13';
if parnum='Prm14' then parnum='Prm14';
if parnum='Prm15' then parnum='Prm15';
if parnum='Prm16' then parnum='Prm16';
if parnum='Prm17' then parnum='Prm17';
if parnum='Prm18' then parnum='Prm18';
if parnum='Prm19' then parnum='Prm19';
if parnum='Prm20' then parnum='Prm20'
;
rename parnum=parm;
run;
proc sort data=next_1;
by parm;
run;
data next_1a;
set &covdsn;
attrib parm format=$25.
;
parm=rowname;
if parm='Prm1' then parm='Prm01'
;
if parm='Prm2' then parm='Prm02'
;
if parm='Prm3' then parm='Prm03'
;
if parm='Prm4' then parm='Prm04'
;
if parm='Prm5' then parm='Prm05'
;
if parm='Prm6' then parm='Prm06'
;
if parm='Prm7' then parm='Prm07'
;
if parm='Prm8' then parm='Prm08'
;
if parm='Prm9' then parm='Prm09'
;
if parm='Prm10' then parm='Prm10'
;
if parm='Prm11' then parm='Prm11'
;
if parm='Prm12' then parm='Prm12'
;
if parm='Prm13' then parm='Prm13'
;
if parm='Prm14' then parm='Prm14'
;
if parm='Prm15' then parm='Prm15'
;
if parm='Prm16' then parm='Prm16'
;
if parm='Prm17' then parm
='Prm17'
;
if parm='Prm18' then parm='Prm18'
;
if parm='Prm19' then parm='Prm19'
;
if parm='Prm20' then parm='Prm20'
;
run;
proc sort data=next_1a;
by parm;
run;
data next_2(drop=effect);
merge next_1a( in=in1a)
next_1 (in=in1);
by parm;
if in1a;
parm=effect;
rename parm=_name_;
run;
data next_3;
set next_2;
if _name_='SCALE' then delete;
run;
data next_4;
length _name_ $25
;
_name_= 'ESTIMATE'
;
output;
run;
data next_5;
set next_4
next_3;
run;
proc print data=next_5;
title Input dataset--GENMOD;
run;
%end;
%else %do;
data next_5;
set &covdsn;
run;
proc print data=next_5;
title Input dataset--LOGISTIC/PHREG;
run;
%end;
%if (next_5 ne ) %then %do;
%let __stop=0;
proc iml;
use next_5;
read all var {_name_} into _varname;
_nrvname=nrow(_varname);
if (_nrvname>1) then do;
_varnam2=_varname(|2:_nrvname, |);
nmissing=j(nrow(_varnam2),1,.);
labels={"EIGENVAL","CONDINDX"," "};
_varnam2=labels//_varnam2;
free _varname labels;
read all var _num_ into varcov(|colname=_nvname|);
_nrcvc=ncol(varcov);
lastvnam=_nvname(|1,_nrcvc|);
if (lastvnam="_LNLIKE_") then
varcov2=varcov(|2:_nrvname,1:_nrcvc-1|);
if (lastvnam^="_LNLIKE_") then varcov2=varcov(|2:_nrvname,|);
%* If the covariance matrix is from GENMOD using the repeated measured
design, ;
%* then the lower diagonal will have the correlations and the upper
diagonal will have;
%* the covariances. The next section of code replaces the lower
diagonal with the upper;
%* diagonal to make a symmetric matrix. If the matrix is symmetrical
already, then the;
%* next section of code will not affect anything.;
vc2_c=ncol(varcov2);
vc2_r=nrow(varcov2);
do cl=1 to vc2_c;
do rw=1 to vc2_r;
varcov2(|rw,cl|) = varcov2(|cl,rw|);
end;
end;
print varcov2;
free varcov _nrcvc lastvnam vc2_c vc2_r cl;
covbinv=inv(varcov2);
scale=inv(sqrt(diag(covbinv)));
r=scale*covbinv*scale;
free covbinv scale;
call eigen(musqr,v,r);
free r;
srootmus=sqrt(musqr);
ci=1/(srootmus/max(srootmus));
phi=(v##2)*diag(musqr##(-1));
sumphi=phi(|,+|);
pi=phi#(sumphi##(-1));
free phi sumphi srootmus v;
final=(musqr||ci||nmissing||pi`)`;
free pi musqr ci nmissing;
_ncfinal=ncol(final);
_nrfinal=nrow(final);
final2=j(_nrfinal,_ncfinal,0);
_ncfp1=_ncfinal+1;
__vdp="VDP";
do i=1 to _ncfinal;
final2(|,_ncfp1-i|)=final(|,i|);
x=char(i,3);
y=compress(concat(__vdp,x));
if i=1 then _vdpname=y;
else _vdpname=_vdpname||y;
end;
free final _nrfinal _ncfinal i x y;
create final2 from final2(|rowname=_varnam2 colname=_vdpname|);
append from final2(|rowname=_varnam2|);
free _varnam2 _vdpname final2;
end;
if (_nrvname=1) then do;
x="1";
call symput("__stop",left(x));
print " ";
print
"**********************************************************";
print "You need to specify the covout option";
print "in either proc logistic or proc phreg.";
print "This program will not calculate collinearity
diagnostics.";
print
"**********************************************************";
print " ";
end;
quit;
run;
%if (&__stop eq 0) %then %do;
proc print data=final2 label noobs;
id _varnam2;
title8 "Collinearity diagnostics for nonlinear models using";
title9 "the information matrix: Eigenvalues, Condition Indexes,";
title10 "and Variance Decomposition Proportions (VDPs)";
label _varnam2="VARIABLE";
run;
%end;
%end;
%else %do;
%put;
%put "*******************************************************";
%put "When you invoke this macro, you have to specify the name";
%put "of a SAS data set that contains the variance-covariance";
%put "matrix from LOGISTIC, PHREG, or GENMOD.";
%put;
%put "For more information, see the macro code (comments";
%put "are included with instructions.";
%put "*******************************************************";
%put;
%end;
proc datasets;
delete next_1 next_1a next_2 next_3 next_4 next_5;
run;
quit;
%mend collin;
Sample execution:
proc logistic data=sashelp.heart covout outest=sasdsn;
class chol_status sex;
model status= chol_status ageatstart sex height;
run;
options mprint symbolgen;
%collin(covdsn=sasdsn);
Source: https://etd.library.emory.edu/file/view/pid/emory:939x5/etd/emory:939w1/kharod_dissertation.pdf
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
ANOVA, or Analysis Of Variance, is used to compare the averages or means of two or more populations to better understand how they differ. Watch this tutorial for more.
Find more tutorials on the SAS Users YouTube channel.