I believe you don't need this metadata table because all this information is also available in dictionary.columns
I also believe that it would be beneficial for downstream processing if you create a collection table that has a stable structure even it it means that some columns might be missing for some years. Creating a stable table structure will avoid issues with downstream code falling over if there is for example a drop statement for a variable that doesn't exist in the sub-set of yearly tables of interest.
In my experience analysts/reporters normally prefer to get the most current version of column attributes (especially Labels and Formats) which is another reason to use all the source table to create the table structure.
It's normally also a good idea to add a variable to such a collection table that provides the information where the data got sourced from.
Below macro will hopefully give you some flexibility to create what you want.
1. Create sample data for testing
/* create sample data */
data
AB2009
AB2010
AB2011(drop=weight height)
AB2013
AB2014
AB2015
AB_All
ABX2023
AB1999AB2023
AB
AB11111
;
set sashelp.class;
run;
data ab2012;
set sashelp.class(rename=(sex=_sex));
attrib SEX length=$3 format=$3. informat=$char3. label="Person's gender";
if _sex='F' then SEX='Female';
else if _sex='M' then SEX='Male';
drop _sex;
run;
data ab2023;
set sashelp.class(drop=height age);
label new_var="That's a New Var";
new_var=1;
run;
2. Macro to create table or view over selected years
%macro createMultiYearTbl(
table_root_name =
,source_lib =
,start_year =
,stop_year =
,target_tbl =
,inds_var = source_table_name
,keep_vars =
,only_vars_from_selected_year = No
,createViewOrTable = Table
);
%if %nrbquote(&start_year)=%nrbquote() %then %let start_year=-5999;
%if %nrbquote(&stop_year)=%nrbquote() %then %let stop_year = 5999;
/* Populate &attrib_list with attrib statement from columns in any table that matches &table_root_name */
%local attrib_list mem_list;
proc sql;
create table work.__allYearsColAttr as
/* create list of column attributes from all tables that match the selection criteria */
select
libname
,memname
,upcase(name) as _upcaseName
,name
,type
,length
,format
,informat
,label
,varnum
,input(scan(memname,-1,,'kd'),year4.) as year
from dictionary.columns
where
libname =%upcase("&source_lib")
and substr(memname,1,max(1,length(memname)-4))=%upcase("&table_root_name")
and not missing(input(scan(memname,-1,,'kd'),year4.))
%if %upcase(&only_vars_from_selected_year) = Y or %upcase(&only_vars_from_selected_year) = YES %then
%do;
and input(scan(memname,-1,,'kd'),year4.) between &start_year and &stop_year
%end;
;
/* populate macro var &source_tbl_list with all selected tables in year range */
select distinct catx('.',libname,memname) into :mem_list separated by ' '
from work.__allYearsColAttr
where year between &start_year and &stop_year
;
/* populate macro var &attrib_list */
select
catx(' '
,name
,ifc(type='char',cats('length=$',length),cats('length=',length))
,ifc(not missing(format),cats('format=',format),' ')
,ifc(not missing(informat),cats('informat=',informat),' ')
,ifc(not missing(label),
ifc(findc(label,'"'),
cats("label='",label,"'"),
cats('label="',label,'"')
),
' '
)
)
into :attrib_list separated by ' '
from
(
/* per column keep attributes from most current occurrence but max length from all occurrences (to avoid string truncation issues) */
select
_upcaseName
,max(length) as length
,name
,type
,format
,informat
,label
,varnum
,year
from work.__allYearsColAttr
group by _upcaseName
having max(year)=year
)
order by year desc, varnum
;
quit;
/* create &target_tbl data */
/* option varlenchk=nowarn to avoid truncation warnings */
/* - the generated attrib statement ensures that no actual string truncation occurs */
%local sv_varlenchk;
%let sv_varlenchk=%sysfunc(getoption(varlenchk,keyword));
options varlenchk=nowarn;
/* housekeeping: clean-up */
proc datasets lib=%scan(work.&target_tbl,-2,.) mt=(data view) nolist nowarn;
delete %scan(&target_tbl,1) __allYearsColAttr;
run;
quit;
/* data step creating target table */
%if %upcase(&createViewOrTable)=VIEW %then
%do;
data &target_tbl /view=&target_tbl;
%end;
%else
%do;
data &target_tbl;
%end;
attrib &attrib_list;
/* avoid uninitialized note */
if 0 then call missing(of _all_);
%if %nrbquote(&inds_var) ne %nrbquote() %then
%do;
length _&inds_var &inds_var $41;
set &mem_list indsname=_&inds_var;
&inds_var=_&inds_var;
%end;
%else
%do;
set &mem_list;
%end;
%if %nrbquote(&keep_vars) ne %nrbquote() %then
%do;
keep &keep_vars;
%end;
run;
/* housekeeping: reset option varlenckh to initial value */
options &sv_varlenchk;
%mend;
3. Macro call and result
%createMultiYearTbl(
table_root_name =ab
,source_lib =WORK
,start_year =2010
,stop_year =2015
,target_tbl =want
/* ,keep_vars =name height */
/* ,only_vars_from_selected_year =N*/
);
proc contents data=want order=varnum;
run;quit;
... View more