I agree with Paige's answer with one warning. It won't work if the upper diagonal elements are missing instead of zero. My code would work with missing values but my code wouldn't work with negative correlations. Which are very common. I experimented with some data this morning to understand how they would work or not work. I have attached my complete code below. I ran two examples, one using a data file from proc corr. The second example uses creates the matrix in the proc iml step.
dm 'log;clear;output;clear;odsresults;clear; ';
proc corr data=sashelp.cars out=out1 noprint; * Create a correlation matrix; run;
proc sql; * Create macro variable with number of numeric variables; select count(name) as N into : varcnt from dictionary.columns where libname='WORK' and memname='OUT1' and type="num"; quit;
proc sql; * Create list of numeric variables that can be used to create char vector in IML; select '"'||strip(name)||'"' into : varlist separated by ',' from dictionary.columns where libname='WORK' and memname='OUT1' and type="num"; quit;
data out1(drop=_type_ j); set out1; where _type_='CORR'; array corrs(&varcnt.) _numeric_;
do j=1 to &varcnt.; * change to upper triangular including the diagonal in this example; if j>_n_ then corrs(j)=.; end; run;
proc print data=out1; * Is it upper triangular; run;
proc iml; cnames={&varlist.}; use out1(drop=_name_); read all into dist;
idx=loc(Dist=.); * If the upper triangular values are missing then set them to zero; Dist[idx]=0; print "Print matrix after changing missing values to zero",dist;
new_dist=dist + dist`; /* This is your square correlation matrix, but with 2 (not 1) on the diagonal */ where = loc(new_dist=2); /* Find the location of the 2 */ new_dist[where]=1; /* Replace 2 with 1 */ * I wanted to run the other reply as I indicated I'm not an IML expert; * This code works well as long as the upper triangular elements aren't equal to missing;
p=ncol(Dist); * Number of columns in the lower triangular correlation matrix; R=J(p,p,0); * Initialize the correlation matrix; Distt=(Dist`); * Transpose the lower triangular matrix to upper; R=Dist<>Distt; * Select the max of the lower and upper matrices;
* I like my code but it's wrong. Correlations can be negative be the other solution will have an error; * if the missing elements are equal to missing.;
print "Correlation Matrices", new_dist[colname=cnames], R[colname=cnames]; * You can see my suggestion doesn't work for negative correlations;
create CorrMat from new_dist[colname=cnames]; append from new_dist; show contents; close CorrMat; quit;
proc print data=CorrMat; run;
proc iml; dist={1.0000000 . . . . . . . . . , 0.9991316 1.0000000 . . . . . . . . , 0.571753 0.564498 1.0000000 . . . . . . . , 0.6497419 0.6452261 0.9080023 1.0000000 . . . . . . , 0.826945 0.8237465 0.7874349 0.8103406 1.0000000 . . . . . , -0.47502 -0.470442 -0.709471 -0.684402 -0.676699 1.0000000 . . . . , -0.439622 -0.434585 -0.717302 -0.6761 -0.647195 0.9410205 1.0000000 . . . , 0.4484264 0.4423322 0.8078667 0.7422087 0.6307958 -0.737966 -0.790989 1.0000000 . . , 0.1520001 0.1483275 0.636517 0.5467305 0.3873978 -0.507284 -0.524661 0.7607028 1.0000000 . , 0.1720368 0.1665864 0.6374482 0.5477827 0.3815539 -0.501526 -0.466092 0.6900207 0.8891947 1.0000000}; * I don't know the dimensions or values of the original matrix so ; * Use the matrix from the cars file; print "Print original matrix as it was entered",dist;
idx=loc(Dist=.); * If the upper triangular values are missing then set them to zero; Dist[idx]=0; print "Print matrix after changing missing values to zero",dist;
new_dist=dist + dist`; /* This is your square correlation matrix, but with 2 (not 1) on the diagonal */ where = loc(new_dist=2); /* Find the location of the 2 */ new_dist[where]=1; /* Replace 2 with 1 */ * I wanted to run the other reply as I indicated I'm not an IML expert;
p=ncol(Dist); * Number of columns in the lower triangular correlation matrix; R=J(p,p,0); * Initialize the correlation matrix; Distt=(Dist`); * Transpose the lower triangular matrix to upper; R=Dist<>Distt; * Select the max of the lower and upper matrices;
LastName = "X" + strip(char(p,4)); * What is the last variabale in the Matrix? 1, 2, ...,p; * First time I saw this was from Rick Wicklin blog; VarNames = "X1":LastName; * Create a character vector of variable names;
print "Correlation Matrices after updates", new_dist[colname=VarNames], R[colname=VarNames]; * You can see that both methods work for changing from a lower triangular to full correlation matrix;
create CorrMat2 from new_dist[colname=VarNames]; append from new_dist; show contents; close CorrMat2; quit;
proc print data=CorrMat2; run;
... View more