Hi,
I am tyring to merge 30 excel files which have 600 hundreds tables in each file.
My issue is that every table has its own headers, so if I simply merge them then all the headers will be gathered together at some place because they are considered as body of tables.
I am pretty sure there's an efficient way to merge table by table.
I would like to illustrate it with two simplified excel files to make sure what I hope to do:
<Excel file 1> <Excel file2>
Name FileYr Table year V1 V2 V3 V4 Name FileYr Table year V1 V2 V3 V13
AB 2000 IS 1997 1 15 3 12 AB 2001 IS 1998 4 3 14
AB 2000 IS 1998 4 3 14 2 AB 2001 IS 1999 7 12 16
AB 2000 IS 1999 7 12 16 4 AB 2001 IS 2000 17 11 9 10
Name FileYr Table year V5 V6 V7 V8 Name FileYr Table year V5 V6 V7 V8 V14
CC 2000 AS 1997 6 14 6 2 CC 2001 AS 1998 12 7 9 1
CC 2000 AS 1998 12 7 9 1 CC 2001 AS 1999 5 21 5 11 22
Name FileYr Table year V9 V10 V11 V12 Name FileYr Table year V9 V10 V12
CC 2000 LB 1997 1 18 17 4 CC 2001 LB 1998 10 7 5
CC 2000 LB 1998 10 7 42 5 CC 2001 LB 1999 71 60 40
<Goal Data Set>
Name FileYr Table year V1 V2 V3 V4 V13
AB 2000 IS 1997 1 15 3 12
AB 2000 IS 1998 4 3 14 2
AB 2000 IS 1999 7 12 16 4
AB 2001 IS 2000 17 11 9 10
Name FileYr Table year V5 V6 V7 V8 V14
CC 2000 AS 1997 6 14 6 2
CC 2000 AS 1998 12 7 9 1
CC 2001 AS 1999 5 21 5 11 22
Name FileYr Table year V9 V10 V11 V12
CC 2000 LB 1997 1 18 17 4
CC 2000 LB 1998 10 7 42 5
CC 2001 LB 1999 71 60 40
I have been struggling with this work and now looking for any comments or advice.
Can anyone help me to resolve this issue in a right way?
Any comments will be greatly appreciated.
Thank you.
Minsoo
Minsoo,
No guarantees, but the following should at least give you enough of a lead to solve your problem. It assumes you are on some 32-bit version of Windows and are trying to upload 32-bit xls file. If not, you have to make slight modifications for the directory search and the proc import engine call.
It also assumes that all of your excel files are in the same directory.
You can't get a SAS file structured as your desired output. A column can only contain one variable name.
Try the following code and see if it comes close to solving your current problem and results in something close to what you want.
I should point out that the first part of the code was stolen (though slightly revised) from:
http://www2.sas.com/proceedings/sugi31/034-31.pdf
options noxwait;
%macro ReadXls (dir=);
%sysexec cd &dir;
%sysexec dir *.xls /b/o:n > flist.txt;
data master;
length _excelfilename $100;
length _sheetname $32;
length f1-f10 $20;
stop;
run;
data _indexfile;
length filen $200;
infile "&dir.\flist.txt";
input filen $;
run;
proc sql noprint;
select count(filen) into :cntfile
from _indexfile;
%if &cntfile>=1 %then %do;
select filen into :filen1-:filen%left(&cntfile)
from _indexfile;
%end;
quit;
%do i=1 %to &cntfile;
libname excellib excel "&dir.\&&filen&i";
proc sql noprint;
create table sheetname as
select tranwrd(memname, "''", "'") as sheetname
from sashelp.vstabvw
where libname="EXCELLIB";
select count(DISTINCT sheetname) into :cnt_sht
from sheetname;
select DISTINCT sheetname into :sheet1 - :sheet%left(&cnt_sht)
from sheetname;
quit;
%do j=1 %to &cnt_sht;
proc import datafile="&dir.\&&filen&i"
out=sheet&j replace;
sheet="&&sheet&j";
getnames=no;
mixed=yes;
run;
data sheet&j;
length _excelfilename $100 _sheetname $32;
set sheet&j;
_excelfilename="&&filen&i";
_sheetname="&&sheet&j";
run;
proc append base=master data=sheet&j force;
run;
%end;
libname excellib clear;
%end;
%mend ReadXls;
%readxls (dir=c:\art)
data want (drop=_: f: varn:);
set master;
array varnm(6);
array vars(14);
retain varnm;
if compress(f1,,'kf') eq "Name" then do;
call missing(of varnm(*));
if compress(f5,,'kd') ne "" then varnm(1)=input(compress(f5,,'kd'),best12.);
if compress(f6,,'kd') ne "" then varnm(2)=input(compress(f6,,'kd'),best12.);
if compress(f7,,'kd') ne "" then varnm(3)=input(compress(f7,,'kd'),best12.);
if compress(f8,,'kd') ne "" then varnm(4)=input(compress(f8,,'kd'),best12.);
if compress(f9,,'kd') ne "" then varnm(5)=input(compress(f9,,'kd'),best12.);
if compress(f10,,'kd') ne "" then varnm(6)=input(compress(f10,,'kd'),best12.);
end;
else if compress(f1,,'kf') ne "" then do;
Name=compress(f1,,'kf');
FileYr=input(compress(f2,,'kd'),best12.);
Table=compress(f3,,'kf');
Year=input(compress(f4,,'kd'),best12.);
if compress(f5,,'kd') ne "" then vars(varnm(1))
=input(compress(f5,,'kd'),best12.);
if compress(f6,,'kd') ne "" then vars(varnm(2))
=input(compress(f6,,'kd'),best12.);
if compress(f7,,'kd') ne "" then vars(varnm(3))
=input(compress(f7,,'kd'),best12.);
if compress(f8,,'kd') ne "" then vars(varnm(4))
=input(compress(f8,,'kd'),best12.);
if compress(f9,,'kd') ne "" then vars(varnm(5))
=input(compress(f9,,'kd'),best12.);
if compress(f10,,'kd') ne "" then vars(varnm(6))
=input(compress(f10,,'kd'),best12.);
output;
end;
run;
HTH,
Art
Minsoo,
No guarantees, but the following should at least give you enough of a lead to solve your problem. It assumes you are on some 32-bit version of Windows and are trying to upload 32-bit xls file. If not, you have to make slight modifications for the directory search and the proc import engine call.
It also assumes that all of your excel files are in the same directory.
You can't get a SAS file structured as your desired output. A column can only contain one variable name.
Try the following code and see if it comes close to solving your current problem and results in something close to what you want.
I should point out that the first part of the code was stolen (though slightly revised) from:
http://www2.sas.com/proceedings/sugi31/034-31.pdf
options noxwait;
%macro ReadXls (dir=);
%sysexec cd &dir;
%sysexec dir *.xls /b/o:n > flist.txt;
data master;
length _excelfilename $100;
length _sheetname $32;
length f1-f10 $20;
stop;
run;
data _indexfile;
length filen $200;
infile "&dir.\flist.txt";
input filen $;
run;
proc sql noprint;
select count(filen) into :cntfile
from _indexfile;
%if &cntfile>=1 %then %do;
select filen into :filen1-:filen%left(&cntfile)
from _indexfile;
%end;
quit;
%do i=1 %to &cntfile;
libname excellib excel "&dir.\&&filen&i";
proc sql noprint;
create table sheetname as
select tranwrd(memname, "''", "'") as sheetname
from sashelp.vstabvw
where libname="EXCELLIB";
select count(DISTINCT sheetname) into :cnt_sht
from sheetname;
select DISTINCT sheetname into :sheet1 - :sheet%left(&cnt_sht)
from sheetname;
quit;
%do j=1 %to &cnt_sht;
proc import datafile="&dir.\&&filen&i"
out=sheet&j replace;
sheet="&&sheet&j";
getnames=no;
mixed=yes;
run;
data sheet&j;
length _excelfilename $100 _sheetname $32;
set sheet&j;
_excelfilename="&&filen&i";
_sheetname="&&sheet&j";
run;
proc append base=master data=sheet&j force;
run;
%end;
libname excellib clear;
%end;
%mend ReadXls;
%readxls (dir=c:\art)
data want (drop=_: f: varn:);
set master;
array varnm(6);
array vars(14);
retain varnm;
if compress(f1,,'kf') eq "Name" then do;
call missing(of varnm(*));
if compress(f5,,'kd') ne "" then varnm(1)=input(compress(f5,,'kd'),best12.);
if compress(f6,,'kd') ne "" then varnm(2)=input(compress(f6,,'kd'),best12.);
if compress(f7,,'kd') ne "" then varnm(3)=input(compress(f7,,'kd'),best12.);
if compress(f8,,'kd') ne "" then varnm(4)=input(compress(f8,,'kd'),best12.);
if compress(f9,,'kd') ne "" then varnm(5)=input(compress(f9,,'kd'),best12.);
if compress(f10,,'kd') ne "" then varnm(6)=input(compress(f10,,'kd'),best12.);
end;
else if compress(f1,,'kf') ne "" then do;
Name=compress(f1,,'kf');
FileYr=input(compress(f2,,'kd'),best12.);
Table=compress(f3,,'kf');
Year=input(compress(f4,,'kd'),best12.);
if compress(f5,,'kd') ne "" then vars(varnm(1))
=input(compress(f5,,'kd'),best12.);
if compress(f6,,'kd') ne "" then vars(varnm(2))
=input(compress(f6,,'kd'),best12.);
if compress(f7,,'kd') ne "" then vars(varnm(3))
=input(compress(f7,,'kd'),best12.);
if compress(f8,,'kd') ne "" then vars(varnm(4))
=input(compress(f8,,'kd'),best12.);
if compress(f9,,'kd') ne "" then vars(varnm(5))
=input(compress(f9,,'kd'),best12.);
if compress(f10,,'kd') ne "" then vars(varnm(6))
=input(compress(f10,,'kd'),best12.);
output;
end;
run;
HTH,
Art
Art,
Thank you for your kind answer. It works fine as long as reading 30 excel files into one dataset, however there seems to be too many variables to put each of them in each column. I appreciate your helpful comments. Thanks!
Your example had var1 thru var14. If you actually have more vars, you only have to increase the array vars from 14 to whatever the actual max is.
Of course, I might just not understand the file you are hoping to achieve. As I mentioned, you cannot have a SAS file looking like your example, unless the file is simply going to be non-analyzable text.
Art,
I didn't fully understand your code at the moment I replied in that your code automatically identifies the identical variable in columns and put body of contents in the right order. Your code brings me up some possibility to reduce tremendous amount of time if it can be applied to my data after some of modification. Looking forward to seeing this happens!
One remaining issue is that my data actually have character variables in the table headers unlikely V1-V14, and I can't manually count how many variables I have in total because it has too many tables.
Could you make any follow-up suggestion or comments on these issues?
Any of your comments will be greatly appreciated.
Thank you.
Minsoo
You could try a slightly different approach, namely building a long master file and then transposing it. See if the following produces what you want:
options noxwait;
%macro ReadXls (dir=);
%sysexec cd &dir;
%sysexec dir *.xls /b/o:n > flist.txt;
data _indexfile;
length filen $200;
infile "&dir.\flist.txt";
input filen $;
run;
proc sql noprint;
select count(filen) into :cntfile
from _indexfile;
%if &cntfile>=1 %then %do;
select filen into :filen1-:filen%left(&cntfile)
from _indexfile;
%end;
quit;
%do i=1 %to &cntfile;
libname excellib excel "&dir.\&&filen&i";
proc sql noprint;
create table sheetname as
select tranwrd(memname, "''", "'") as sheetname
from sashelp.vstabvw
where libname="EXCELLIB";
select count(DISTINCT sheetname) into :cnt_sht
from sheetname;
select DISTINCT sheetname into :sheet1 - :sheet%left(&cnt_sht)
from sheetname;
quit;
%do j=1 %to &cnt_sht;
proc import datafile="&dir.\&&filen&i"
out=sheet&j replace;
sheet="&&sheet&j";
getnames=no;
mixed=yes;
run;
data sheet&j. (keep=Name FileYr Table Year var data);
array vars(10) $32.;
array vars_in(10) $32. f1-f10;
retain vars;
set sheet1;
if compress(f1,,'kfd') eq "Name" then do;
call missing(of vars(*));
do i=1 to 10;
if compress(vars_in(i),,'kfd') ne "" then
vars(i)=input(compress(vars_in(i),,'kfd'),$32.);
end;
end;
else if compress(f1,,'kf') ne "" then do;
Name=compress(vars_in(1),,'kfd');
FileYr=input(compress(vars_in(2),,'kd'),best12.);
Table=compress(vars_in(3),,'kfd');
Year=input(compress(vars_in(4),,'kd'),best12.);
do i=5 to 10;
if compress(vars_in(i),,'kd') ne "" then do;
var=vars(i);
data=input(compress(vars_in(i),,'kd'),best12.);
output;
end;
end;
end;
run;
proc append base=master data=sheet&j force;
run;
%end;
proc sort data=master;
by Name FileYr Table Year;
run;
proc transpose data=master out=want (drop=_NAME_);
by Name FileYr Table Year;
var data;
id var;
run;
libname excellib clear;
%end;
%mend ReadXls;
%readxls (dir=c:\art)
Hi Art,
Great! It works just fine! Your help is tremendously valuable for me.
Thanks,
Minsoo
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.