I finally got what I need using bits and pieces from literally everyone's suggestions. And I learned a lot in the process. So thank you all very much! /** create sample data **/
%macro createSampleData(dir=%sysfunc(pathname(work)));
%do month=1 %to 4;
%let _month=%sysfunc(putn(&month,z2.));
filename out zip "&dir/NSDQsh2020&_month..zip";
%do day=1 %to 30;
%let _day=%sysfunc(putn(&day,z2.));
data _null_;
file out("NSDQsh2020&_month.&_day..txt");
put
"a,b,c,&day, 2020&_month.&_day" /
"x,y,z,&day, 2020&_month.&_day" /
"a,t,s,&day, 2020&_month.&_day"
;
stop;
run;
%end;
filename out clear;
%end;
%mend;
%macro dirlist(dir,ext,result=dir_list);
/*Gets the directory location and file name for every ZIP in the directory*/
%local filrf rc did memcnt name i;
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
proc datasets lib=%scan(work.&result,-2,.) nolist nowarn;
delete %scan(&result,-1,.);
run;
quit;
%if &did eq 0 %then %do;
%put Directory &dir cannot be open or does not exist;
%return;
%end;
%do i = 1 %to %sysfunc(dnum(&did));
%let name=%qsysfunc(dread(&did,&i));
%if %qupcase(%qscan(&name,-1,.)) = %upcase(&ext) %then %do;
/*%put &dir\&name;*/
data _&result;
length dir $200 file_name $100;
dir="&dir";
file_name="&name";
output;
stop;
run;
proc datasets lib=%scan(work.&result,-2,.) nolist nowarn;
append base=%scan(&result,-1,.) data=_%scan(&result,-1,.);
run;
delete _%scan(&result,-1,.);
run;
quit;
%end;
%else %if %qscan(&name,2,.) = %then %do;
%dirlist(&dir\&name,&ext)
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend dirlist;
%macro zipMemList(source=, outds=zip_mem_list);
/*Gets the file path names for all the ZIP files in the directory and the names of the .txt file contents of the ZIP files.
Produces dataset: _zip_mem_list which contains zip(the file path name) and memname (the txt contents).
*/
/* Assign a fileref wth the ZIP method */
filename inzip zip "&source";
/* Read the "members" (files) from the ZIP file */
data _tmp(keep=zip memname);
length zip $200 memname $200;
zip="&source";
fid=dopen("inzip");
if fid=0 then
stop;
memcount=dnum(fid);
do i=1 to memcount;
memname=dread(fid,i);
output;
end;
rc=dclose(fid);
run;
filename inzip clear;
/*Append to get info for every ZIP and every .txt file*/
proc append base=&outds data=_tmp;
run;quit;
proc delete data=_tmp;
run;quit;
%mend;
%macro processData(source= , member=, outds= );
/*Imports data from each txt file and summarizes. Then appends to create a master file of summary data over all txt files*/
filename inzip zip "&source";
*Import a text file directly from the ZIP;
data _tmp(compress=yes);
infile inzip(&member)
firstobs=1 dsd dlm=',';
input
(var1-var3) ($) var4 var5;
run;
*Summarize the data;
proc sql;
create table _tmp2 as
select var1, var5, sum(var4) as sum_4
from _tmp
group by var1, var5;
quit;
*Append summarized dataset and delete original;
proc append base=&outds data=_tmp2;
run;quit;
proc delete data=_tmp2;
run;quit;
%mend processData;
/** execution **/
/* define path where zip files reside */
%let source_dir=%sysfunc(pathname(work));
/* define target lib for result tables */
%let target_lib=nasdaq;
libname &target_lib "%sysfunc(pathname(work))";
/* create sample zip files under this path */
%createSampleData(dir=&source_dir);
/* create dataset that contains directory path names and zip file names */
%dirlist(&source_dir,zip,result=_dir_list);
/*create dataset that contains zip file pathnames and .txt file contents*/
data _null_;
set _dir_list;
length _name $1000;
length _cmd $1000;
_name=cats(dir,'/',file_name);
call execute('%zipMemList(source=' ||strip(_name)|| ', outds=_zip_mem_list);');
run;
/*Import each txt file and summarize the data. Append to create master dataset of summarized data across all txt files in all ZIPs*/
data _null_;
set _zip_mem_list;
call execute('%processData(source=' ||strip(zip)|| ' ,member=' ||strip(memname)|| ',outds=summarized_data);');
run;
... View more