Hi. Rick.
I think SQL can get what you want. But Due to your large number of variables, you need another macro to walk through all these variables.
NOTE: This way create about 3000 datasets to hold the similarity ,so it is less efficient than IML.
data a; input investor $ companyID $ wt;
cards;
A IBM 0.50
A MSFT 0.40
A GOOG 0.10
A GRPN 0.00
B IBM 0.40
B MSFT 0.60
B GOOG 0.00
B GRPN 0.00
C IBM 0.50
C MSFT 0.00
C GOOG 0.00
C GRPN 0.50
;
run;
proc sort data=a; by companyID;run;
proc transpose data=a out=temp(drop=_name_);
by companyid;
id investor;
var wt;
run;
options nomprint nomlogic nosymbolgen;
%macro across;
proc sql ;
select distinct investor from a;
select distinct investor into :investor1 - :investor&sqlobs. from a;
%let obs=&sqlobs ;
%do i=1 %to %eval(&obs. - 1);
create table _&i as
select
%do j=%eval(&i+1) %to &obs.;
sum(&&investor&i*&&investor&j)/(sqrt(uss(&&investor&i))*sqrt(uss(&&investor&j))) as &&investor&i..&&investor&j
%if &j ne &obs. %then %do;,%end;
%end;
from temp;
%end;
quit;
%do k=1 %to %eval(&obs. - 1);
data __&k;
length name $ 40;
set _&k;
array _x{*} _numeric_;
do i=1 to dim(_x);
name=vname(_x{i});value=_x{i};output;
end;
keep name value;
run;
%end;
data want; set __:; run;
%mend across;
%across
Ksharp