BookmarkSubscribeRSS Feed
MILKYLOVE
Calcite | Level 5

Hello all, 

 

I have a large csv file blc_20220602_20220601220525.csv that I split into different files of 350,000 lines. However, after splitting the files,

I am left with these names:

MILKYLOVE_0-1664368563542.png

 

Instead of 'aa', 'ab', 'ac', How can I name my split files into

blc_20220602_20220601220525_1.csv

blc_20220602_20220601220525_2.csv

blc_20220602_20220601220525_3.csv

etc...

 

%let blc = c:/myfiles/blc;


/* ************* Construction of macro "SPLIT" ************* */
%macro split_unix(fichier_complet=,
                          taille_split=, 
                          out=);
%let fichier = %scan(&fichier_complet,-1,/);
%let path = %substr(&fichier_complet,1,%index(&fichier_complet,&fichier)-1);
%let fic = %scan(&fichier,1,.);
%let extension = %scan(&fichier,2,.);

%global cr_split;
%let cr_split=0;

%put path = &path;
%put fichier = &fichier;
%put fic = &fic;
%put extension = &extension;
%put &fic..&extension;

/* *********** Verify if the file is empty *********** */
%let list_fic_vide =;
%let flag_vide =;
filename rep_cn pipe "wc -l &path./&fichier";

/* ************ Verify if the file exceeds the limit ************ */
%let count_row = 0;

data rep_cn;
    infile rep_cn dsd truncover;
    input var:$250.;
    if not find(lowcase(var),"total");
    count = input(scan(var,1," "),20.);
    fic = scan(var,-1,"/");
run;

proc sql noprint;
    select "KO", fic into :flag_vide, :list_fic_vide separated by " - "
    from rep_cn where count <= 1;
     
     select max(count) into: count_row
     from rep_cn group by fic;
quit;

%put List of empty files : &list_fic_vide.;
%put Max volume is : &count_row. nombre de lignes;
/* **************************** */

%if &flag_vide ne KO and &count_row. > %scan(&taille_split,2) %then %do;
     x "cd &path";
     systask command "split &taille_split &fic..&extension &fic" wait status=fg_split;

     %put CR Unix du split est : &fg_split;

     %if not &fg_split > 0 %then %do;
          filename list_fic pipe "find . ! -name '*.&extension' -a -name '*&fic*'";

          data temp;
                infile list_fic missover truncover dsd;
                input var:$50.;
                var = scan(var,2,"/");
          run;

          data _null_;
                set temp end=fin;
                call symputx(compress('split'||put(_n_,3.)),var,'g');
                if fin then call symputx('nbre',put(_n_,3.));
          run;

          %do i = 1 %to &nbre;
                %put split&i = &&split&i;
                x "mv &&split&i ./&out/&&split&i...&extension";
                %let cr_split=%eval(&cr_split+&sysrc);
          %end;
     %end;
%end; /* Fin %if &flag_vide ne KO */
%else %do;
     %let cr_split=1;
%end;
%mend;
/* ************************************************************** */

%macro boucle_lancement(chemin=,fic_a_split=);
     filename fic pipe "ls &chemin/&fic_a_split.";

     data _null_;
          infile fic missover truncover end=fin;
          input fic:$100.;
          call symputx(compress('fic'||put(_n_,3.)),fic,'g');
          if fin then call symputx('nbre_fic',put(_n_,3.),'g');
     run;

     %do nb = 1 %to &nbre_fic;
          %split_unix(fichier_complet=&&fic&nb,
                          taille_split=-l 350000, 
                          out=);
                          
          %if &cr_split eq 0 %then %do;
                x "rm &&fic&nb";
          %end;
     %end;
     
%mend;

%boucle_lancement(chemin=&blc.,
                          fic_a_split=blc_*.csv);

 

 

4 REPLIES 4
Tom
Super User Tom
Super User

Why are they getting named that way to begin with?  Is it because that is how the Unix command split names the files?  Is there an option to change how it names the files so it numbers them instead appending letters?

 

If not why not just split the files yourself instead of calling that Unix command.

 

%let infile=big.txt ;
%let outbase=small ;
data _null_;
   fileno+1;
   infile "&infile" ;
   outfile=cats("&outbase._",fileno,'.txt');
   do row=1 to 350000;
      input ;
      file out filevar=outfile ;
      put _infile_;
   end;
run;
MILKYLOVE
Calcite | Level 5

Yes, I think the systask command split just names the files with alphabets by default. 

That's why I'm trying to find a way to change the name or maybe use another method.

Tom
Super User Tom
Super User

I already showed you how to generate the split files with numeric suffixes using a different method.

 

If you want to rename the existing files then first get the list of files into data.  Say you have the list in a dataset named FILES and the filename is in a variable named FILENAME.  Then you could use code like this to generate NEW_NAME variable.

proc sql;
create table rename_files
  select substr(filename,1,length(filename)-6) as basename
       , count(*) as n_files
       , filename
  group by 1
   having count(*) > 1 
;
quit;

data rename_files;
   do suffix=1 by 1 until(last.basename);
      set rename_files;
      by basename;
      new_name = cats(basename,putn(suffix,cats(Z,length(cats(n_files)),'.'),'.csv');
      output;
   end;
run;

You could then use that list to generate code to move, rename or copy the files.

Ksharp
Super User
/*Assuming you are under Windows OS.
If your OS is Linux/Unix ,try to use LS command to repalce DIR command
*/

%let path_in=  c:\temp\ ;  


filename x pipe %sysfunc(quote(dir "&path_in."\*.csv /s /b));
data x;
 infile x truncover;
 input old $2000.;
if prxmatch('/^_\d+/',scan(old,-1,'/\')) then do;
n+1;
new=prxchange('s/[\.a-z]+$//i',1,strip(old));
new=cats("&path_in.\blc",scan(new,-1,'/\'),'_',n,'.csv');
rc=rename(strip(old),strip(new),'file');
end;
run; 

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 4 replies
  • 1129 views
  • 0 likes
  • 3 in conversation