I HAVE the following data:
DirectorID | CompanyID | AwardYear | DegreeType |
1928993 | 62323 | 1998 | Undegrad |
1798822 | 62360 | 1998 | Undegrad |
2384193 | 467713 | 2003 | Other |
1769649 | 467713 | 2003 | Other |
1777732 | 1519459 | 2002 | Undegrad |
1627886 | 1519459 | 2003 | Undegrad |
1323941 | 1519459 | 2004 | Undergrad |
1839100 | 60212 | 2008 | Grad |
1706497 | 60212 | 2008 | Undergrad |
I want to count the number of unique rows (Matches) where a DirectorID matches with another DirectorID on ALL of the following criteria: (1) CompanyId, (2) within +/- one year of AwardYear, and (3) DegreeType.
The following output is what I WANT:
DirectorID | CompanyID | AwardYear | DegreeType | Matches |
1928993 | 62323 | 1998 | Undegrad | 0 |
1798822 | 62360 | 1998 | Undegrad | 0 |
2384193 | 467713 | 2003 | Other | 1 |
1769649 | 467713 | 2003 | Other | 1 |
1777732 | 1519459 | 2002 | Undegrad | 1 |
1627886 | 1519459 | 2003 | Undegrad | 2 |
1323941 | 1519459 | 2004 | Undergrad | 1 |
1839100 | 60212 | 2008 | Grad | 0 |
1706497 | 60212 | 2008 | Undergrad | 0 |
What code could be used to create the "Matches" variable?
Thank you!!
If you do not have a big table ,try PROC SQL.
data have;
infile cards expandtabs truncover;
input DirectorID CompanyID AwardYear DegreeType $;
cards;
1928993 62323 1998 Undegrad
1798822 62360 1998 Undegrad
2384193 467713 2003 Other
1769649 467713 2003 Other
1777732 1519459 2002 Undegrad
1627886 1519459 2003 Undegrad
1323941 1519459 2004 Undegrad
1839100 60212 2008 Grad
1706497 60212 2008 Undergrad
;
proc sql;
create table want as
select a.*,count(b.DirectorID) as Matches
from have as a left join have as b
on a.CompanyID=b.CompanyID and a.AwardYear between b.AwardYear-1 and b.AwardYear+1
and a.DegreeType=b.DegreeType and a.DirectorID ne b.DirectorID
group by a.DirectorID,a.CompanyID,a.AwardYear,a.DegreeType;
quit;
If you do not have a big table ,try PROC SQL.
data have;
infile cards expandtabs truncover;
input DirectorID CompanyID AwardYear DegreeType $;
cards;
1928993 62323 1998 Undegrad
1798822 62360 1998 Undegrad
2384193 467713 2003 Other
1769649 467713 2003 Other
1777732 1519459 2002 Undegrad
1627886 1519459 2003 Undegrad
1323941 1519459 2004 Undegrad
1839100 60212 2008 Grad
1706497 60212 2008 Undergrad
;
proc sql;
create table want as
select a.*,count(b.DirectorID) as Matches
from have as a left join have as b
on a.CompanyID=b.CompanyID and a.AwardYear between b.AwardYear-1 and b.AwardYear+1
and a.DegreeType=b.DegreeType and a.DirectorID ne b.DirectorID
group by a.DirectorID,a.CompanyID,a.AwardYear,a.DegreeType;
quit;
If your computer has the big memory ,try hash table :
data have;
infile cards expandtabs truncover;
input DirectorID CompanyID AwardYear DegreeType $;
cards;
1928993 62323 1998 Undegrad
1798822 62360 1998 Undegrad
2384193 467713 2003 Other
1769649 467713 2003 Other
1777732 1519459 2002 Undegrad
1627886 1519459 2003 Undegrad
1323941 1519459 2004 Undegrad
1839100 60212 2008 Grad
1706497 60212 2008 Undergrad
;
data want;
if _n_=1 then do;
if 0 then set have(rename=(DirectorID=_DirectorID));
declare hash h(dataset:' have(rename=(DirectorID=_DirectorID))',multidata:'y',hashexp:20);
h.definekey('CompanyID','AwardYear','DegreeType');
h.definedata('_DirectorID');
h.definedone();
end;
set have;
Matches=0;
do i=AwardYear-1 to AwardYear+1;
rc=h.find(key:CompanyID,key:i,key:DegreeType);
do while(rc=0);
if DirectorID ne _DirectorID then Matches+1;
rc=h.find_next(key:CompanyID,key:i,key:DegreeType);
end;
end;
drop i rc _DirectorID;
run;
Your data appear to be already grouped by companyid and degreetype. If so, then:
data have;
infile cards expandtabs truncover;
input DirectorID CompanyID AwardYear DegreeType :$9.;
cards;
1928993 62323 1998 Undergrad
1798822 62360 1998 Undergrad
2384193 467713 2003 Other
1769649 467713 2003 Other
1777732 1519459 2002 Undergrad
1627886 1519459 2003 Undergrad
1323941 1519459 2004 Undergrad
1839100 60212 2008 Grad
1706497 60212 2008 Undergrad
;
data want;
array awdcount{1990:2010} _temporary_;
call missing(of awdcount{*});
do until (last.degreetype);
set have;
by companyid degreetype notsorted;
awdcount{awardyear}+1;
end;
do until (last.degreetype);
set have;
by companyid degreetype notsorted;
nmatch=sum(awdcount{awardyear-1},awdcount{awardyear},awdcount{awardyear+1})-1;
output;
end;
run;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 16. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.