Hi.
The following macro will move duplicate datasets within the first library to a second library.
By duplicate datasets I assume, EXACTLY the same layout (variable names, types and lenghts) and EXACTLY the same data.
The code is not simple easy, but not from the other world also. I've placed some comments at every step.
First it will compare layouts... Then, for the ones with the same layout it will run a PROC COMPARE in pairs to check if data is exactly the same... Finally it will move one of them to the second LIB.
It will create some work tables in the WORK library, so obviously do not use WORK as INLIB/OUTIB.
%macro move_dup_dataset_from_lib(INLIB,OUTLIB);
proc contents data=&INLIB.._ALL_ out=WORK._ALL memtype=DATA noprint;
run; * retrieve all contents in INLIB;
proc sql noprint;
create table WORK._DUPS as
select A1.MEMNAME, A1.NAME, A1.TYPE, A1.LENGTH, A1.NOBS, A2.TOTVAR from _ALL as A1
left join
(select MEMNAME, count(*) as TOTVAR from _ALL group by MEMNAME) as A2
on A1.MEMNAME = A2.MEMNAME
where A1.MEMNAME not in
(select distinct MEMNAME from _ALL as B1,
(select NAME, TYPE, LENGTH, NOBS, count(*) as COUNT
from _ALL group by NAME, TYPE, LENGTH having COUNT = 1) as B2
where B1.NAME = B2.NAME and B1.TYPE = B2.TYPE and B1.LENGTH = B2.LENGTH);
quit; * retrieve duplicated layouts (same name, type and length);
proc sort;
by NAME TYPE LENGTH NOBS TOTVAR;
run;
data WORK._PAIRS;
set WORK._DUPS;
retain BASE;
keep BASE DATA;
by NAME TYPE LENGTH NOBS TOTVAR;
DATA=MEMNAME;
if first.TOTVAR + last.TOTVAR lt 2;
if first.TOTVAR then BASE=MEMNAME;
else output;
proc sort noduprecs;
by BASE DATA;
run; * eliminate datasets with layouts/data within other and create pairs;
%let _PAIRS=;
proc sql noprint;
select BASE, DATA into :_BASES separated by ' ', :_DATAS separated by ' ' from WORK._PAIRS;
quit; * load pairs lists into macro variables for cycling;
* cycle through pairs and move if the same;
%do _I=1 %to %sysfunc(countw(&_BASES,%str( )));
%let _BASE=&INLIB..%scan(&_BASES,&_I); * get one base;
%let _DATA=&INLIB..%scan(&_DATAS,&_I); * get one data;
%if %sysfunc(exist(&_DATA)) %then %do;
proc contents data=&_BASE out=WORK._CONTENT memtype=DATA noprint;
run; * get contents;
proc sql noprint;
select NAME into :_NAMES separated by ' ' from WORK._CONTENT;
quit; * get var names;
proc sort data=&_BASE; by &_NAMES; * sort everything;
proc sort data=&_DATA; by &_NAMES; * sort everything;
run; * order;
proc compare base=&_BASE compare=&_DATA
out=WORK._DIF outnoequal noprint;
run; * compare both;
data _null_;
call symput('_DIF','0');
run; * assume they are not equal;
proc sql noprint;
select count(*) eq 0 into :_DIF from WORK._DIF;
quit; * check if any diffs;
%if &_DIF %then %do;
%put *** &_BASE same as &_DATA, moving the latest to &OUTLIB library ***;
proc datasets lib=&INLIB nolist;
copy out=&OUTLIB move;
select %scan(&_DATA,2,%str(.));
quit; * move it;
%end;
%end;
%end;
proc datasets lib=WORK nolist;
delete _ALL _CONTENT _DIF _DUPS _PAIRS;
quit; * delete work tables;
%mend move_dup_dataset_from_lib;
%move_dup_dataset_from_lib(MYLIB,DUPS);
Hope it helps.
Daniel Santos @ www.cgd.pt
... View more