BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
Phil_NZ
Barite | Level 11

Hi all SAS Users!

It is quite a mess for me to debug the current situation. I really need your helps with this case

 

I have two codes and they all work well individually so far. However, when I am prudent by running and comparing both of them, the results generated from them are a little bit different. 

Even these codes are about dealing with all files in one folder, but to simplify and testable, I just use one file in the folder.

Code 1:

options compress=yes reuse=yes;
options mergenoby=error;
OPTIONS MPRINT;

%macro ImportAndTranspose(
      File=
	  , cur=
	  , outf=
      , StartSheet=
      , EndSheet=
   );
   %local i;
   
   %do i = &StartSheet. %to &EndSheet.;

    %if &i=1 %then %do;
	
    proc import datafile= "&File." 
                  out= &outf.&i.
                  dbms= xlsx 
                  replace;
          range= "Sheet1$A:X";
          getnames= yes;
      run;
      
      proc sort data= &outf.&i.;
      by Type;
      run;
	%end;

	  %else %if &i=34 %then %do;
      proc import datafile= "&File." 
                  out= &outf.&i.
                  dbms= xlsx 
                  replace;
          range= "Sheet&i.$A:AG";
          getnames= yes;
      run;
      
      proc sort data= &outf.&i.;
      by Type;
      run;

      proc transpose data= &outf.&i. 
            out= &outf.&i._out(rename=(COL1=s&i. _NAME_=Year) drop=_label_);
         by Type;
         VAR '1988'N - '2019'N;
      run;
	  proc print data=&outf.&i._out;
	  run;

	  data &outf.&i._outx;
	  set &outf.&i._out;
      if s&i. in: ('NA', '$$', '') then s&i. = " ";
	  run;

	  %end;
    
   
    %else %if (&i ne 1) and (&i ne 34) %then %do;
      proc import datafile= "&File." 
                  out= &outf.&i.
                  dbms= xlsx 
                  replace;
          range= "Sheet&i.$A:AG";
          getnames= yes;
      run;
      
      proc sort data= &outf.&i.;
      by Type;
      run;

      proc transpose data= &outf.&i. 
            out= &outf.&i._out(rename=(COL1=s&i. _NAME_=Year) drop=_label_);
         by Type;
         VAR '1988'N - '2019'N;
      run;
	  proc print data=&outf.&i._out;
	  run;

      data &outf.&i._outx;
      set &outf.&i._out;
	  if input(s&i., ?? 32.) = . then s&i.2=.;
      else s&i.2=input(s&i., 32.);
	  /*if s&i. in: ('NA', '$$', '..') then s&i.2=.;
	  else if s&i. not in: ('NA', '$$', '..') then s&i.2=input(s&i., 32.);*/
	  drop s&i.;
	  rename s&i.2=s&i.;
      run;
	  %end;
	%end;
 /* Merging*/
%let outfm = %substr(&outf,1,%length(&outf)-5);
  data &outfm.merge1;
     merge
	  %do i =&StartSheet.+1 %to &EndSheet.;
	    &outf.&i._outx(keep= type year s&i.)
	  %end;
	;
	by type year;
  run;


 data &outfm.merge2;
     merge
	  &outf.&StartSheet.
	  &outfm.merge1 
	 ;
	 by type;
 run;

   
   
%mend;

/*Replicate all files in one folder*/

filename mydir 'C:\Users\pnguyen\Desktop\New folder';
data _null_;
did = dopen('mydir');
do i = 1 to dnum(did);
  fname = scan(dread(did,i),1,'.');
  /*fname: AustriaEUR*/
  length short_fn $29 currency $3 ;
  short_fn= cats(substr(fname, 1,length(fname)-3),'_');
  currency=substr(fname,length(fname)-2);
  cmd=cats('%ImportAndTranspose(File=C:\Users\pnguyen\Desktop\New folder\',
      strip(fname),
      ',cur=',currency,
      ',outf=',short_fn,'sheet,startsheet=1,endsheet=45);');
	  /*strip(fname) to link 
	  File=C:\Users\pnguyen\Desktop\New folder\AustriaEUR*/
  call execute(cmd);
end;
keep fname;
run;

To this code, the datafile named is AustriaEUR, and I have the output of interest is AUSTRIA_MERGE2

 

Code 2:


options compress=yes reuse=yes;
options mergenoby=error;
OPTIONS MPRINT;

%macro ImportAndTranspose(
      File=
	  , outf=
      , StartSheet=
      , EndSheet=
   );
   %local i;
   
   %do i = &StartSheet. %to &EndSheet.;

    %if &i=1 %then %do;
	
    proc import datafile= "&File." 
                  out= &outf.&i.
                  dbms= xlsx 
                  replace;
          range= "Sheet1$A:X";
          getnames= yes;
      run;
      
      proc sort data= &outf.&i.;
      by Type;
      run;
	%end;

	  %else %if &i=34 %then %do;
      proc import datafile= "&File." 
                  out= &outf.&i.
                  dbms= xlsx 
                  replace;
          range= "Sheet&i.$A:AG";
          getnames= yes;
      run;
      
      proc sort data= &outf.&i.;
      by Type;
      run;

      proc transpose data= &outf.&i. 
            out= &outf.&i._out(rename=(COL1=s&i. _NAME_=Year) drop=_label_);
         by Type;
         VAR '1988'N - '2019'N;
      run;
	  proc print data=&outf.&i._out;
	  run;

	  data &outf.&i._outx;
	  set &outf.&i._out;
      if s&i. in: ('NA', '$$', '') then s&i. = " ";/********/
	  run;

	  %end;
    
   
    %else %if (&i ne 1) and (&i ne 34) %then %do;
      proc import datafile= "&File." 
                  out= &outf.&i.
                  dbms= xlsx 
                  replace;
          range= "Sheet&i.$A:AG";
          getnames= yes;
      run;
      
      proc sort data= &outf.&i.;
      by Type;
      run;

      proc transpose data= &outf.&i. 
            out= &outf.&i._out(rename=(COL1=s&i. _NAME_=Year) drop=_label_);
         by Type;
         VAR '1988'N - '2019'N;
      run;
	  proc print data=&outf.&i._out;
	  run;

      data &outf.&i._outx;
      set &outf.&i._out;
	  if input(s&i., ?? 32.) = . then s&i.2=.;
      else s&i.2=input(s&i., 32.);
	  /*if s&i. in: ('NA', '$$', '..') then s&i.2=.;
	  else if s&i. not in: ('NA', '$$', '..') then s&i.2=input(s&i., 32.);*/
	  drop s&i.;
	  rename s&i.2=s&i.;
      run;
	  %end;
	%end;
 /* Merging*/
%let outfm = %substr(&outf,1,4);
  data &outfm.merge1;
     merge
	  %do i =&StartSheet.+1 %to &EndSheet.;
	    &outf.&i._outx(keep= type year s&i.)
	  %end;
	;
	by type year;
  run;


 data &outfm.merge2;
     merge
	  &outf.&StartSheet.
	  &outfm.merge1 
	 ;
	 by type;
 run;

   
   
%mend;

/*Replicate all files in one folder*/

filename mydir 'C:\Users\pnguyen\Desktop\New folder';
data _null_;
did = dopen('mydir');
do i = 1 to dnum(did);
  fname = dread(did,i);
  short_fn= cats(substr(fname, 1,3),'_');
  cmd=cats('%ImportAndTranspose(File=C:\Users\pnguyen\Desktop\New folder\',
      strip(fname),',outf=',short_fn,'sheet,startsheet=1,endsheet=45);');

  call execute(cmd);
end;
keep fname;
run;

The filename now is Austria_ and the output of interest is Aus_merge2

 

The file Austria_ is also the file AustriaEUR, I just change the name to fit the according to code. However, the output of interest Aus_merge2 is slightly different from AUSTRIA_MERGE2 (different about 300 over 22,000 observations). In particular, there are more observations in the output of code 2 compared to that of code 1. 

 

I try to print the code out and compare it but still have not yet found out what causes this difference. 

 

I really and truly hope that you can help me to sort it out.

 

Many thanks and warmest regards.

 

 
Thank you for your help, have a fabulous and productive day! I am a novice today, but someday when I accumulate enough knowledge, I can help others in my capacity.
1 ACCEPTED SOLUTION

Accepted Solutions
ballardw
Super User

Usually when something changes a behavior like running the second time and throwing such a warning or error the first is that the code that runs second finds something left over from the previous run that was not there the first time.

 

My guess with so many data sets is one or more data sets that is referenced before actual creation the first time but then creates one. Another frequent possible leftover is one or more macro variables.

 

Did you run the first try with Options mprint? and keep the log to compare with the second?

View solution in original post

5 REPLIES 5
ChrisNZ
Tourmaline | Level 20

That's a lot of code. And we don't even have data or the log.

You need to narrow the problem down.

Kurt_Bremser
Super User

Maxim 2: Read the Log.

 

Compare the two logs step by step, so you can find out at which step the observation numbers start to differ.

Add %PUT statements in the macro(s), so you know exactly at which iteration of the %DO loop you are, and can reconstruct with which values the DATA/PROC steps run.

ballardw
Super User

Lets see if I understand some of this.

You created new code with the same macro name?  (Hint: bad form right there for debugging anything. If you have an error compiling your new macro code the old code might actually be what is running).

 

The new code created different output that the first? Then look for where the code changed. Better: document where/when/why changes were made).

 

The code uses the same naming convention for intermediate data sets?  Again this can have a problem with data sets remaining from a previous run that get used where there is an error in one of the intermediate steps that is supposed to create the "new" version data set.

 

I would be strongly tempted to modify this bit of your code:

data _null_;
did = dopen('mydir');
do i = 1 to dnum(did);
  fname = scan(dread(did,i),1,'.');
  /*fname: AustriaEUR*/

to create separate output data sets with the read filenames

data run_1 (keep=fname);
did = dopen('mydir');
do i = 1 to dnum(did);
  fname = scan(dread(did,i),1,'.');
  output;
  /*fname: AustriaEUR*/

obviously change the name of the data set when using the "substitue" codes.

See if they read the same files, at least if the values of Fname are the same.

 

But really, compare the code line by line and test it OUTSIDE of a macro with with the same inputs.

Phil_NZ
Barite | Level 11

Hi @ballardw  @Kurt_Bremser  and @ChrisNZ 

 

I just check and rerun 3 more times to see the reason after checking the code carefully. The result turns out is that these two codes produce the same results.

 

However, at the first time the results of running these two codes are different because for the first code, SAS EG popped up a notice "Enterprise Guide was unable to submit this job successfully" as the picture below.

My97_0-1611782400708.png

I am wondering if you can tell me why it happened and how to sort it out. (I mean, now we only focus on the code 1, not code 2).

 

Many thanks and warmest regards.

 
Thank you for your help, have a fabulous and productive day! I am a novice today, but someday when I accumulate enough knowledge, I can help others in my capacity.
ballardw
Super User

Usually when something changes a behavior like running the second time and throwing such a warning or error the first is that the code that runs second finds something left over from the previous run that was not there the first time.

 

My guess with so many data sets is one or more data sets that is referenced before actual creation the first time but then creates one. Another frequent possible leftover is one or more macro variables.

 

Did you run the first try with Options mprint? and keep the log to compare with the second?

sas-innovate-2024.png

Don't miss out on SAS Innovate - Register now for the FREE Livestream!

Can't make it to Vegas? No problem! Watch our general sessions LIVE or on-demand starting April 17th. Hear from SAS execs, best-selling author Adam Grant, Hot Ones host Sean Evans, top tech journalist Kara Swisher, AI expert Cassie Kozyrkov, and the mind-blowing dance crew iLuminate! Plus, get access to over 20 breakout sessions.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 5 replies
  • 474 views
  • 6 likes
  • 4 in conversation