Yes, you have the syntax. Each new locally "remote" SAS session winds up using a different CPU core on your computer. What I was working on probably would have taken a few days to run. Using all 20 of my cores, it took ~5 hours, which isn't 1/20th of the time, but still a lot faster. It can now run over night as opposed to all weekend. You have to split your dataset up into chunks so that each core can run on a different part of the dataset. Of course, you can't have code that requires a copy of the full dataset (e.g. no retain, lag(), etc.).
Here's my code. I have 20 cores and 23 chunks per core. That way, in case my computer crashes, I will have at least some of it complete and saved permanently already. (The comments are for me; you probably already know the stuff they say.)
* Split text data up into separate datasets so that multiple SAS sessions can be employed to increase CPU usages. Otherwise, SAS only uses one core out of 20, and this would take a long time to run. *;;
* While we can only use 20 cores at once, we can still break up the data into smaller chunks and run them sequentially on each core, while the sequential series of chunks are run in parallel. This *;
* way, if something happens to the process, we will have some results and will not need to start from the beginning. *;
* Determine the rows in the input data. This only works if saved as a SAS dataset. It does not work on the zipped view. *;;
data _null_;
* This set statement never executes, so no records are read, but the header information about the set dataset is read in. *;
if 0
then set _3_NonNullWithLetters nObs = n;
call symputx('nRows',n);
stop; * Stops infinite loop from never reaching the end of a file. *;
run; * nRows = 45980013 *;
%put nRows = &nRows;
%let nObsPerChunk = 100000;
%let nChunks = %sysFunc(ceil(&nRows/&nObsPerChunk));
%let nCores = 20;
%let nChunksPerCore = %sysFunc(ceil(&nChunks/&nCores));
%put nObs = &nRows nObsPerChunk = *&nObsPerChunk* nChunks = &nChunks nCores = *&nCores* nChunksPerCore = &nChunksPerCore;
* Create the chunks! Split the data into separate datasets so that each core can process difference data simultaneously. *;;
%macro chunks;
data %do chunkA = 1 %to &nChunksPerCore;
%do coreA = 1 %to &nCores;
CoreChnk.Core_&coreA._Chunk_&chunkA (compress = binary)
%end;
%end;;
set Michael._3_NonNullWithLetters;
* We want to fill up the first chunk of each core first, then the second, etc. so that they all have as equal number of chunks as possible as *;
* opposed to the first cores-1 having the total number of chunks, and the last core having maybe as few as 1. *;
select;
%do chunkB = 1 %to &nChunksPerCore;
%do coreB = 1 %to &nCores;
%let chunkIndex = %sysEvalF(&nCores*(&chunkB - 1) + &coreB);
when (%sysEvalF(&nObsPerChunk*(&chunkIndex - 1)) < _n_ <= %sysEvalF(&nObsPerChunk*&chunkIndex)) output CoreChnk.Core_&coreB._Chunk_&chunkB;
%end;
%end;
end;
run;
* Fix capitalization in dataset names and set to read-only. *;
options noXWait noXSync;
%local xString;
%do coreC = 1 %to &nCores; %let xString = cd "%sysFunc(pathName(CoreChnk))\"; %do chunkC = 1 %to &nChunksPerCore; %let xString = %sysFunc(compBl(&xString & attrib -R "Core_&coreC._Chunk_&chunkC..sas7bdat" )); %end; %sysExec &xString; %end; %sleep(20);
%do coreC = 1 %to &nCores; %let xString = cd "%sysFunc(pathName(CoreChnk))\"; %do chunkC = 1 %to &nChunksPerCore; %let xString = %sysFunc(compBl(&xString & rename "Core_&coreC._Chunk_&chunkC..sas7bdat" "Core_&coreC._Chunk_&chunkC..sas7bdat")); %end; %sysExec &xString; %end; %sleep(10);
%do coreC = 1 %to &nCores; %let xString = cd "%sysFunc(pathName(CoreChnk))\"; %do chunkC = 1 %to &nChunksPerCore; %let xString = %sysFunc(compBl(&xString & attrib +R "Core_&coreC._Chunk_&chunkC..sas7bdat")); %end; %sysExec &xString; %end;
options noXWait xSync;
%mEnd chunks;
options mPrint;
%chunks;
options noMPrint;
* Macro to run all the cores and chunks. *;
%macro coreChunk;
options sasCmd = "sas";
* Current datetime. *;
%let ___startDT = %sysFunc(datetime());
* Delete the results from previous runs. *;
proc datasets library = CoreChnk noList;
delete %do core = 1 %to &nCores;
%do chunk = 1 %to &nChunksPerCore;
Core_&core._Chunk_&chunk._ForReview
Core_&core._Chunk_&chunk._NoReview
Core_&core._Chunk_&chunk._ForReviewDetail
%end;
%end;
;
quit;
%do core = 1 %to &nCores;
signOn core&core; * Pass macro variables to the remote session. *;
%sysLPut core = &core / remote = core&core;
%sysLPut nChunksPerCore = &nChunksPerCore / remote = core&core;
* Pass the libName to the remote session, and submit code to the remote session. *;
rSubmit core&core wait = no inheritLib = (CoreChnk);
* Redirect the log to an external file *;
proc printTo log = "K:\DAplay\Michael\ECS\MGUS Text Search\Core Chunk/Core_&core..log" new;
run;
options fullSTimer;
* Must define the macros within the remote session. *;
%include 'K:\Michael\Search all text fields everywhere - Match Macro.sas' / lRecL = 4096;
%include 'K:\Michael\Search all text fields everywhere - Chunk Macro.sas' / lRecL = 4096;
%include 'K:\Support Macros\squeeze_1.sas' / lRecL = 4096;
%chunk;
proc printTo log = log;
run;
/* %sysRPut ___startDT_&core = &___startDT;*/
endRSubmit;
%end;
waitfor _all_;
signoff _all_;
* Print total duration. *;
data _null_;
dur = datetime() - &___startDT;
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
%mEnd coreChunk;
%coreChunk;
... View more