You could create hash values over the variables of interest. Rows with the same identical sets of variable values will then also get the same hash value.
data have;
input userid firstName $ lastName $ zip emailAddress:$50. enrollmentDate:yymmdd10.;
format enronllmentDate date9.;
datalines;
1 First Last 9988787 flast@provider.domain 2011-01-01
2 First Last 9999888 flast@provider.domain 2013-01-01
3 First Last 9999888 firstlast@provider.domain 2014-01-01
4 First Last 9911111 last999@provider2.domain 2016-01-01
;
data want;
set have;
length case_1 case_2 $32.;
case_1=hashing('md5',catx('|',firstName,lastName,zip));
case_2=hashing('md5',catx('|',firstName,lastName,emailAddress));
run;
/* demo: dups for case 1 */
proc sql;
select *, count(*) as n_dups
from want
group by case_1
having count(*)>1
;
quit;
... View more