Patrick: Your reply almost has me where I need to be. I have (I think) one last question. I would like to, as a final step, sum up observations by group. Using your sample data (I added an additional row of data to your code below to make my question functional), I get the above image as a final output data set. However, what I really want as a final output is a condensed version of this that sums "var4" by group "var1" and a date (one of the variables on my true data is a date. I'm just using source_file as a stand in here). So my desired output would look like this for "Nsdqsh202002". var1 source_file sum_var4 1 a 20200201 2 2 x 20200201 1 3 a 20200202 4 4 x 20200202 2 5 a 20200203 6 6 x 20200203 3 Then do the same process for the next file "Nsdqsh202003". I just can not figure out the timing of the macros and call execute commands. Thank you! /** create sample data **/
%macro createSampleData(dir=%sysfunc(pathname(work)));
%do month=1 %to 4;
%let _month=%sysfunc(putn(&month,z2.));
filename out zip "&dir/NSDQsh2020&_month..zip";
%do day=1 %to 30;
%let _day=%sysfunc(putn(&day,z2.));
data _null_;
file out("NSDQsh2020&_month.&_day..txt");
put
"a,b,c,&day" /
"x,y,z,&day" /
"a,t,s,&day"
;
stop;
run;
%end;
filename out clear;
%end;
%mend;
/** macro definitions **/
/*
list files in a directory. Code based on:
https://go.documentation.sas.com/?docsetId=mcrolref&docsetTarget=n0js70lrkxo6uvn1fl4a5aafnlgt.htm&docsetVersion=9.4&locale=en
*/
%macro dirlist(dir,ext,result=dir_list);
%local filrf rc did memcnt name i;
%let rc=%sysfunc(filename(filrf,&dir));
%let did=%sysfunc(dopen(&filrf));
proc datasets lib=%scan(work.&result,-2,.) nolist nowarn;
delete %scan(&result,-1,.);
run;
quit;
%if &did eq 0 %then %do;
%put Directory &dir cannot be open or does not exist;
%return;
%end;
%do i = 1 %to %sysfunc(dnum(&did));
%let name=%qsysfunc(dread(&did,&i));
%if %qupcase(%qscan(&name,-1,.)) = %upcase(&ext) %then %do;
/*%put &dir\&name;*/
data _&result;
length dir $200 file_name $100;
dir="&dir";
file_name="&name";
output;
stop;
run;
proc datasets lib=%scan(work.&result,-2,.) nolist nowarn;
append base=%scan(&result,-1,.) data=_%scan(&result,-1,.);
run;
delete _%scan(&result,-1,.);
run;
quit;
%end;
%else %if %qscan(&name,2,.) = %then %do;
%dirlist(&dir\&name,&ext)
%end;
%end;
%let rc=%sysfunc(dclose(&did));
%let rc=%sysfunc(filename(filrf));
%mend dirlist;
/* list members in zip file */
%macro zipMemList(source=, outds=zip_mem_list);
/* Assign a fileref wth the ZIP method */
filename inzip zip "&source";
/* Read the "members" (files) from the ZIP file */
data &outds(keep=zip memname);
length zip $200 memname $200;
zip="&source";
fid=dopen("inzip");
if fid=0 then
stop;
memcount=dnum(fid);
do i=1 to memcount;
memname=dread(fid,i);
output;
end;
rc=dclose(fid);
run;
filename inzip clear;
%mend;
/* read member in zip file into SAS dataset */
%macro ReadMemInZip(source=, member=, outds=);
/* Assign a fileref with the ZIP method */
filename inzip zip "&source";
/* Import a text file directly from the ZIP */
data _tmp(compress=yes);
infile inzip(&member)
firstobs=1 dsd dlm=',';
input
(var1-var3) ($) var4;
length source_file $ 150;
source_file="&member";
run;
/* append to want dataset */
proc append base=&outds(compress=yes) data=_tmp;
run;quit;
proc delete data=_tmp;
run;quit;
filename inzip clear;
%mend;
/* extract per zip file all the data */
%macro extract(zipfile,outds=want);
/* extract list of members for a zip file */
data _null_;
length _cmd $1000;
_cmd=cats('%',"zipMemList(source=&zipfile, outds=_zip_mem_list);");
call execute(_cmd);
stop;
run;
/* read all the members in zip file into SAS dataset */
data _null_;
set _zip_mem_list;
length _cmd $1000;
_cmd=cats('%ReadMemInZip(source=',zip,',member=',memname, ",outds=&outds);");
call execute(_cmd);
run;
%mend;
/** execution **/
/* define path where zip files reside */
%let source_dir=%sysfunc(pathname(work));
/* define target lib for result tables */
%let target_lib=nasdaq;
libname &target_lib "%sysfunc(pathname(work))";
/* define date range for zip file selection */
%let start_yyyymm=202002;
%let end_yyyymm=202003;
/* create sample zip files under this path */
%createSampleData(dir=&source_dir);
/* create SAS table with all zip files in folder path */
%dirlist(&source_dir,zip,result=_dir_list);
/* extract data. Create a table per source zip file */
%let start_dt=%sysfunc(inputn(&start_yyyymm,yymmn6.));
%let end_dt=%sysfunc(inputn(&end_yyyymm,yymmn6.));
data _null_;
set _dir_list(
where=(input(scan(scan(file_name,1,'.'),-1,,'kd'),yymmn6.) between &start_dt and &end_dt)
);
length _cmd $1000 _outds $41;
_outds=catx('.',"&target_lib",scan(file_name,1,'.'));
put _outds=;
/* extract members list per zip file */
_cmd=cats('%extract(zipfile=',dir,'/',file_name,',outds=',_outds,');');
call execute(_cmd);
run;
... View more