Thank you very much Arthur!!! Exactly what I needed and I learned a lot from your comment, everything is more clear to me now! One last question : lets say the dataset can either have the variable "name", "id" or "dw_id" and I want to my sure that my code counts the number of duplicates even if it does not find the variable "name" (that it then look for "id"; if not found look for dw_id "id_name")... How do I properly code this? Here is my failed attemp. I have put in bold the code I added (not working). Hope you understand what I am trying to do : Your help and time are really appreciated. /* create some test data */ data table1; input name $ age @@ ; cards; John 45 N/A 30 . 15 Carl 25 Carl 30 John 30 ; data table2; input id $ height; cards; Blue 110 N/A 120 . 100 Red Blue 120 Blue 120 ; data table3; input id_name $ height; cards; John 45 N/A 30 . 15 Bob 25 Bob 30 Bob 15 Bob 60 John 30 ; data class; set sashelp.class; if name in ('Carol','James') then call missing(age); if name in ('Jane','John') then output; output; run; /*end of test data creation */ /*create formats that can later be used to identify missing and N/A data*/ /*format nn is for numeric values*/ /*format $c is for character values*/ proc format; value nn low-high='1' other='3'; value $c 'N/A'='2' ' '='3' other='1'; run; %macro tables(stuff); /*the macro won't work unless the file work.tables doesn't exist*/ /*so the proc datasets is used to delete it*/ proc datasets lib=work nolist; delete tables; quit; %let j=1; /*loop through all of the identified datasets*/ %do %while(%scan(&stuff,&j,/) ne); %let dset=%scan(&stuff,&j,/); if upcase(name) eq "NAME" then variable="name"; if upcase(id) eq "ID" then variable="ID"; if upcase(id_name) eq "ID_NAME" then variable="ID_name"; /*run proc freq for each dataset, storing the output in work.temp*/ ods output onewayfreqs=temp (keep=table f_: freq:); proc freq data=&dset; tables _all_ / missing; format _numeric_ nn. _character_ $c.; run; ods output close; /*use proc sql to count the number of duplicates on variable name*/ /*placing the result in a macro variable called &dups.*/ proc sql; select sum(num) into :dups from ( select COUNT(variable) as num FROM &dset. GROUP BY variable HAVING ( COUNT(variable) > 1 ) ) ; quit; /*restructure work.temp to create the desired columns*/ /*and incorporate &dups. as a column called duplicates*/ data temp (keep=table name not_: missing duplicates total); length table name $32; do until(last.table); set temp; by table notsorted; array t(*) f_:; select(t(_n_)); when('1') not_missing = frequency; when('2') not_applicable = frequency; when('3') missing = frequency; end; end; total = sum(of not_:, missing); name = scan(table,2); table = "&dset"; if upcase(name) eq "NAME" then duplicates=&dups.; if upcase(id) eq "ID" then duplicates=&dups.; if upcase(id_name) eq "ID_NAME" then duplicates=&dups.; run; /*append work.temp to a dataset called tables*/ proc append base=tables data=temp; run; %let j=%eval(&j+1); %end; /*cleanup by deleting work.temp .. the final desired dataset*/ /*will be work.tables*/ proc datasets lib=work nolist; delete temp; quit; %mend; ods listing close; %tables(table1/table2/table3); ods listing;
... View more