Hello
I'm trying to create a SELECT statement for different tables having different variables - the aim is to check if each table contains duplicates, but I'm struggling to get the right code (this community kindly helped me to find a solution when I was looking for duplicates considering all columns, but now I only need to select some columns ( i.e. keys)).
As I cannot use GROUP BY *, I need to list all the variables in the GROUP BY statement, but I can't find the way to do so.
Below is my attempt - which of course does not work, as the SELECT &Keys and GROUP BY &KEYS does not list all the variables.
/* Create the tables containing the keys for each report */
data METADATA;
input report $ Keys $;
datalines;
CARS MAKE
CARS MODEL
BASEBALL NAME
BASEBALL TEAM
GAS FUEL
CLASS NAME
CLASS SEX
;
run;
/*create the summary (final) table*/
PROC SQL;
CREATE TABLE WORK.DUPL
(
Report char(7),
Tot_Dups Numeric(5)
);
QUIT;
/*macro to loop through all tables*/
%macro makereport(Report=, Keys=);
PROC SQL;
CREATE TABLE POPULATION_AUX AS
SELECT "&Report" AS REPORT , COUNT(A.CT) AS Tot_Dups
FROM (
SELECT &Keys, COUNT(*) AS CT
FROM SASHELP.&Report
GROUP BY &KEYS
HAVING CT>1
)A
GROUP BY A.Report
QUIT;
PROC APPEND BASE=WORK.POPULATION DATA=WORK.POPULATION_AUX FORCE;
RUN;
%mend makereport;
/*create the macros*/
data;
set METADATA;
*builds string to execute macro;
str = catt('%makereport(REPORT=',Report,',Keys=', Keys, ');');
*execute macro;
call execute(str);
run;
Thanks in advance
... View more