Generalizing the program to identify the shareholders by Sector/quarter proved a little bit more difficult for me than i supposed but i think the following program should work, though i haven't checked the resulting dataset thoroughly.
data have;
format Quarter date9.;
input Shareholder $ Company $ Quarter date9. Sector;
cards;
A AA 31MAR2006 2
B AA 31MAR2006 2
C AA 31MAR2006 2
C BB 31MAR2006 2
B BB 31MAR2006 2
Z BB 31MAR2006 2
A CC 31MAR2006 2
B CC 31MAR2006 2
C CC 31MAR2006 2
Z CC 31MAR2006 2
A DD 31MAR2006 3
B DD 31MAR2006 3
C DD 31MAR2006 3
C EE 31MAR2006 3
B EE 31MAR2006 3
Z EE 31MAR2006 3
A FF 31MAR2006 3
B FF 31MAR2006 3
C FF 31MAR2006 3
Z FF 31MAR2006 3
A AA 30JUN2006 2
B AA 30JUN2006 2
C AA 30JUN2006 2
C BB 30JUN2006 2
Y BB 30JUN2006 2
Z BB 30JUN2006 2
A CC 30JUN2006 2
B CC 30JUN2006 2
C CC 30JUN2006 2
Z CC 30JUN2006 2
A DD 30JUN2006 3
B DD 30JUN2006 3
C DD 30JUN2006 3
A EE 30JUN2006 3
B EE 30JUN2006 3
Y EE 30JUN2006 3
A FF 30JUN2006 3
Y FF 30JUN2006 3
C FF 30JUN2006 3
Z FF 30JUN2006 3
;
run;
proc sql noprint;
/* We generate all possible pairs as a Cartesian product */
CREATE TABLE pairs AS
SELECT DISTINCT a.Sector, a.Quarter, a.Company AS Comp1, b.Company AS Comp2
FROM have a, have b
WHERE a.Company<b.Company /* Avoid to include both (AA,BB) and (BB,AA) */
AND b.Quarter=a.Quarter AND b.Sector=a.Sector
;
quit;
/* Create a reference dataset of shareholders with numeric id */
/* that can be used later as an array index */
proc sort data=have(keep=Shareholder) out=Shareholders nodupkey;
by Shareholder;
run;
data Shareholders;
Id=_N_;
set Shareholders;
call symput("nbsh",strip(_N_));
run;
/* We append the shareholders ids to the have dataset */
proc sql noprint;
CREATE TABLE have AS
SELECT a.*, b.ID
FROM have a
LEFT JOIN Shareholders b
ON b.Shareholder=a.Shareholder;
quit;
proc sort data=have out=have_S;
by Sector Quarter Shareholder Company;
run;
data have_S;
set have_S;
by Sector Quarter;
fstq=first.Quarter;
lstq=last.Quarter;
run;
/* For each pairs, we identify Sharheloders that have shares in both companies */
data Common;
set pairs;
/* Array index is the Id of the shareholder */
array SH (&nbsh.) SH1-SH&nbsh.;
do i=1 to nobs;
/* We rename Sector and Quarter to avoid overriding columns in pairs dataset */
set have_S (rename=(Sector=Sect Quarter=Quart)) point=i nobs=nobs;
if fstq then call missing(of SH(*));
if Sector=Sect and Quarter=Quart and (Company=Comp1 or Company=Comp2) then SH(Id)+1;
if lstq then do idx=1 to dim(SH);
/* If the shareholder with Id=idx have shares in both companies from the current pair */
/* we save the Id and output the result. */
if SH(idx)=2 then do;
ShareholderId=idx;
output;
end;
end;
end;
keep Quarter Sector Comp1 Comp2 ShareholderId;
run;
/* We retrieve the shareholer name */
proc sql noprint;
CREATE TABLE Common AS
SELECT a.*, B.Shareholder
FROM Common a
LEFT JOIN Shareholders b
ON b.Id=a.ShareholderId
ORDER BY Comp1, Comp2, Shareholder;
quit;
... View more