We've written a macro that reads in multiple zipped folders and for each folder, reads in multiple XML files and parses the XML files into records in a dataset. Here is how things look: /folders/myfolders/In |- data_zip1.zip |- sub_folder |- xmlfile1.xml |- xmlfile2.xml ... |- xmlfile500.xml |- data_zip2.zip |- sub_folder |- xmlfile1.xml |- xmlfile2.xml ... |- xmlfile500.xml ...and so on for n number of zipped folders. Problem 1: It seems like every time we run the macro, it appends the output dataset instead of writing over it. Problem 2: We also need to delete the temporary work directories and datasets after each proc append (the xml_test_&j. datasets towards the bottom of the code) so they don't clog up SAS EG and throw a hissy fit for having more than 50 datasets. See link below for our first post when we were trying to figure out how to parse out an XML file from a zipped folder. https://communities.sas.com/t5/General-SAS-Programming/Import-zipped-XML-files/m-p/322732#M42476 options spool nonotes nosource nosource2 errors=0;
filename zips "/folders/myfolders/In/";
*get names of zip files contained within the directory;
data zips_contents;
fid=dopen("zips");
if fid=0 then stop;
memcount = dnum(fid);
do i = 1 to memcount;
memname = dread(fid,i);
output;
end;
run;
*assign macro variables for the zip files;
** zipNOBS: number of zip files in the folder;
**&&zipname&i: name of the zip file;
proc sql noprint;
select count(*)
into :zipNObs
from zips_contents;
select memname
into :zipname1-:zipname%left(&zipNObs)
from zips_contents;
quit;
*check macro variables - outputs value to log;
%put count of zip files is &zipNObs;
%put name of zip files is &zipname1;
*macro cycles through each zip folder in the directory location;
*for each zip folder, cycle through each of the xml files, parses the xml, and append to a SAS dataset;
%macro doit;
%do i=1 %to &zipNObs;
filename inzip ZIP "/folders/myfolders/In/&&zipname&i..";
*read the name of each xml file in each zipped folder;
data contents_&i.;
length memname $200 ;
fid=dopen("inzip");
if fid=0 then
stop;
memcount=dnum(fid);
do i=1 to memcount;
/*Scans and gives only the name of the xml file*/
memname=scan(dread(fid,i), 2, '\');
output;
end;
rc=dclose(fid);
run;
*populates dataset CONTENTS that contains a list of each of the xml files in the zip file;
PROC APPEND BASE=CONTENTS DATA=CONTENTS_&i. FORCE;
RUN;
*assign macro variables for the xml files;
** xmlNOBS: number of xml files in the folder;
**&&xmlname&i: name of the xml file;
proc sql noprint;
select count(*)
into :xmlNObs
from contents_&i.;
select memname
into :xmlname1-:xmlname%left(&xmlNObs)
from contents_&i.;
quit;
*check macro variables for the xml files - outputs value to log;
%put count of xml files is &xmlNObs;
%put name of xml files is &xmlname1;
%do j=1 %to &xmlNObs;
/* identify a temp folder in the WORK directory */ /* have SAS take each xml file and put it in a temporary work directory*/
filename xl "%sysfunc(getoption(work))/&&xmlname&j" ;
/* hat tip: "data _null_" on SAS-L */
data _null_;
/* using member syntax here */
infile inzip(sub_folder\&&xmlname&j)
lrecl=256
recfm=F
length=length
eof=eof unbuf;
file xl lrecl=256 recfm=N;
input;
put _infile_ $varying256. length;
return;
eof:
stop;
run;
*use the my_map2.map as a guide for sas to parse the xml file; *for each xml file in its temporary work directory, parse and append;
filename SXLEMAP '/folders/myfolders/XML_Map/my_map2.map'; libname my_xml XMLV2 "%sysfunc(getoption(work))/&&xmlname&j.." xmlmap=sxlemap access=readonly;
data xml_test_&j.;
set my_xml.xml_contents;
run;
*xml_test contains the parsed xml file. One record per xml file;
PROC APPEND BASE=xml_test DATA=xml_test_&j. FORCE;
RUN;
%end;
%end;
%mend;
%doit
*check and output transactions as permanent dataset;
proc print data= xml_test; run;
libname output "/folders/myfolders/Out";
DATA output.parsed_XML_Data;
set xml_test;
run;
... View more