Ernesto
Duke is of course right. The code I provided counts only duplicate ID's.
The code below shows/counts now duplicate records.
What confuses me:
ID is defined as primary key and should therefore be unique.
Just removing all duplicate records won't solve all possible issues with duplicate ID's. There could even be different sets of duplicate records per id as
the sample data illustrates.
data mydataset;
keep id var1-var5;
length id 8;
array var {5} 8;
do id=1 to 2;
do i=1 to 20;
do j=1 to dim(var);
var{j}=round(ranuni(1));
end;
output;
end;
end;
run;
/* variant1: count before duplicates got removed */
proc sql;
select NAME INTO :col_list separated by ','
from dictionary.columns
where libname='WORK' and memname='MYDATASET';
;
select count(*) as Duplicates,*
from mydataset
group by &col_list
having count(*)>1
;
quit;
/* variant2: count after duplicates got removed */
proc sort data=mydataset out=Unique_mydataset equals nodupkey dupout=Dup_mydataset;
by _all_;
run;
proc sql;
select name into :col_list separated by ','
from dictionary.columns
where libname='WORK' and memname='Dup_mydataset';
;
select count(*)+1 as Duplicates,*
from Dup_mydataset
group by &col_list
;
quit;
HTH
Patrick