Hi, I hope you realize you will have to make a comparison of all datasets together. For 1000 datasets that means C(1000,2)=1000!/(2!(1000-2)!)=499500 comparisons... Not sure how long it will take. Try this: %* Create test data;
data a b c d e;
do i=1 to 10;
uid=i;
if i > 4 then output a b c d e;
else if i > 3 then output a b c d;
else if i>2 then output a b c;
else if i>1 then output a b;
else output a;
end;
drop i;
run;
%* Determine Datasets to check;
PROC SQL;
CREATE VIEW v_all as
SELECT memname,nobs
from sashelp.vtable
where libname eq 'WORK' /*adapt*/
and memname not in ('__COMPARISON', 'V_ALL')
;
CREATE TABLE __COMPARISON AS
select a.memname as current,a.nobs as currentnobs, b.memname as next, . as similarity format=percent7.1
from v_all a,v_all b
where b.memname>a.memname
order by a.memname,b.memname
;
DROP VIEW v_all;
QUIT;
%* Prepare comparison macro;
%MACRO compare(uniqueIDVar,current,next);
PROC SQL;
update __COMPARISON SET similarity=(
(select count(*) from ¤t. where &uniqueIDVar. in (select &uniqueIDVar. from &next.))/currentnobs
) where current eq "¤t." and next eq "&next."
;
QUIT;
%MEND compare;
%* Run comparisons;
DATA _NULL_;
set __COMPARISON;
call execute('%nrstr(%compare(uid,'||strip(current)||','||strip(next)||'))');
RUN; Cheers,
... View more