Sort your data by companyid/datestartrole, then you can discover the matches one company at a time:
data have;
input DIRECTORID COMPANYID DATESTARTROLE :date9. DATEENDROLE :date9.;
format DATESTARTROLE DATEENDROLE :date9.;
cards;
1 1001 01JUL1998 05MAR2021
2 1001 01APR2020 05MAR2021
3 1001 01JUL2010 01MAY2011
1 1002 06MAR2021 07MAR2024
2 1002 06JUL2023 05MAY2024
4 1002 01APR1988 15JUN2023
;
proc sort data=have out=need;
by companyid datestartrole;
run;
%let max_size=50; /*Max possible N of concurrent directors*/
data _null_;
array curr_dirs {&max_size} _temporary_;
array exdates {&max_size} _temporary_;
array exdirs {&max_size} _temporary_;
if _n_=1 then do;
declare hash overlaps();
overlaps.definekey('dir1','dir2');
overlaps.definedone();
declare hash hwant(ordered:'a');
hwant.definekey('directorid');
hwant.definedata('directorid','matches');
hwant.definedone();
end;
set need end=end_of_need;
by companyid datestartrole;
if first.companyid then call missing(of curr_dirs{*},of exdates{*},of exdirs{*});
/* First obs for this dir? Make a new hwant entry with matches=0*/
if hwant.find(key:directorid)^=0 then hwant.add(key:directorid,data:directorid,data:0);
/* Remove dirs from CURR_DIRS with exit dates prior to datestartrole*/
min_exdate=min(of exdates{*},'31dec9999'd);
do while (min_exdate<datestartrole);
e=whichn(min(of exdates{*}),of exdates{*});
d=whichn(exdirs{e},of curr_dirs{*});
call missing (curr_dirs{d},exdates{e},exdirs{e});
min_exdate=min(of exdates{*},'31dec9999'd);
end;
/*Add the new director's exitdate to upcoming exit list */
do e=1 by 1 until(exdates{e}=.); end; /*Identify leftmost missing exdate */
exdates{e}=dateendrole;
exdirs{e}=directorid;
/* Add this director to curr_dirs array*/
do d=1 by 1 until(curr_dirs{d}=.); end;
curr_dirs{d}=directorid;
do d=1 to n(of curr_dirs{*});
dir1=directorid;
dir2=smallest(d,of curr_dirs{*});
if dir2=dir1 then continue;
if overlaps.find()=0 then continue; /*Skip below If this overlap already extablished*/
overlaps.add(key:dir1,key:dir2,data:dir1,data:dir2); /* dir1/dir2 */
overlaps.add(key:dir2,key:dir1,data:dir2,data:dir1); /* dir2/dir1 */
do directorid=dir2,dir1;
hwant.find();
matches+1;
hwant.replace();
end;
end;
if end_of_need then hwant.output(dataset:'want');
run;
One company at a time, this program dynamically updates arrays listing the current directors, and the upcoming director exit dates.
It uses those arrays to update two hashes over all companies: (1) OVERLAPS which records each pair of directors having an overlap, and (2) HWANT, which keeps track of the count of overlaps for each dir. If a pair of directors overlaps more than once, it counts as only one in the MATHES variable.
... View more