Hi,
You seem to be comfortable with PROC SQL, so here is the pure PROC SQL version.
At first, the proposed answer might appear to look like a cartesian product (which it is), but is generated in prudent and limited manner to ensure that the cartesian product is as small as possible.
PROC SQL;
insert into groups
values ('A', 0,1,1,0,0,0 )
values ('B', 0,0,0,0,1,1 )
values ('C', 0,0,0,0,1,1 )
values ('D', 1,0,0,1,0,0 )
values ('E', 1,0,0,1,0,0 )
values ('F', 1,1,0,1,0,0 )
values ('G', 0,1,0,1,0,0 );
QUIT;
/* do a self-join to identify duplicates */
PROC SQL;
create table groups_exact_dupe_members as
select distinct aa.group as base_group
, bb.group as comparison_group
, aa.Mike
, aa.Amy
, aa.Sam
, aa.Nick
, aa.Ann
, aa.Bob
from groups as aa
inner join groups as bb
on aa.group <> bb.group /* self-join, thus no need to join same group */
and aa.group < bb.group /* account for symmetry */
and aa.Mike = bb.Mike
and aa.Amy = bb.Amy
and aa.Sam = bb.Sam
and aa.Nick = bb.Nick
and aa.Ann = bb.Ann
and aa.Bob = bb.Bob;
QUIT;
/* do a self-join to identify groups that have some members in common */
PROC SQL;
/* Note that the inequality check (aa.Mike>0, aa.Amy>0, max(aa.Mike, bb.Mike)>0, etc.)
ensures that we negate zero vs zero comparisons. In short:
0 vs 0 - don't care
0 vs 1 - DO care
1 vs 0 - DO care
1 vs 1 - DO care */
create table groups_common_members as
select distinct aa.group as base_group
, bb.group as comparison_group
, sum(case when aa.Mike > 0 and aa.Mike = bb.Mike then 1 else 0 end,
case when aa.Amy > 0 and aa.Amy = bb.Amy then 1 else 0 end,
case when aa.Sam > 0 and aa.Sam = bb.Sam then 1 else 0 end,
case when aa.Nick > 0 and aa.Nick = bb.Nick then 1 else 0 end,
case when aa.Ann > 0 and aa.Ann = bb.Ann then 1 else 0 end,
case when aa.Bob > 0 and aa.Bob = bb.Bob then 1 else 0 end) AS num_in_common
, sum(case when max(aa.Mike, bb.Mike) > 0 and aa.Mike <> bb.Mike then 1 else 0 end,
case when max(aa.Amy, bb.Amy) > 0 and aa.Amy <> bb.Amy then 1 else 0 end,
case when max(aa.Sam, bb.Sam) > 0 and aa.Sam <> bb.Sam then 1 else 0 end,
case when max(aa.Nick, bb.Nick) > 0 and aa.Nick <> bb.Nick then 1 else 0 end,
case when max(aa.Ann, bb.Ann) > 0 and aa.Ann <> bb.Ann then 1 else 0 end,
case when max(aa.Bob, bb.Bob) > 0 and aa.Bob <> bb.Bob then 1 else 0 end) AS num_not_in_common
, cat("Groups ", aa.group, " and ", bb.group, " have ",
CALCULATED num_in_common, " in common; ",
CALCULATED num_not_in_common, " not in common ") length=100 AS group_common_label
from groups as aa
inner join groups as bb
on aa.group <> bb.group
and aa.group < bb.group
and ((aa.Mike > 0 and aa.Mike = bb.Mike)
or (aa.Amy > 0 and aa.Amy = bb.Amy)
or (aa.Sam > 0 and aa.Sam = bb.Sam)
or (aa.Nick > 0 and aa.Nick = bb.Nick)
or (aa.Ann > 0 and aa.Ann = bb.Ann)
or (aa.Bob > 0 and aa.Bob = bb.Bob))
where calculated num_in_common > 0
order by 1, 3;
QUIT;
... View more