Solved: Re: Do loop in macro

David_Billa · Posted 07-08-2021 04:38 AM

In the below code, value of totobs resolves to 2. But I'm getting the value of mac=2 in both the observations. Instead of mac=1 in first record and mac=2 in second record. Any help?

%macro chk;
data Calculation;
set uu;
%do i=1 %to &totobs;
mac=cats('A',&i);
value=&&A&i.;
%end;
output;
run;
%mend;
%chk;

%put &A1.;
%put &A2.;

Tom · Posted 07-08-2021 11:41 AM

Not sure what all of the other loop do loops are for but it looks like you have this metadata on rules.

data rule;
  length cre $32 rule $1000;
  input cre rule;
cards4;
[IC-A_PoC_ACT] ('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n
[IC-A_GoC_ACT] ('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n
;;;;

Which you could use to create some SAS code like this:

filename code temp;
data _null_;
  set rule;
  file code;
  if _n_>1 then put 'else ' @;
  put 'if ' cre=$quote. 'then do;'
    / '  mac="A' _n_ '";'
    / '  value=' rule ';'
    / 'end;'
  ;
run;

So that you get this code:

if cre="[IC-A_PoC_ACT]" then do;
  mac="A1 ";
  value=('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n ;
end;
else if cre="[IC-A_GoC_ACT]" then do;
  mac="A2 ";
  value=('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n ;
end;

Which you could then apply to your data.

options validvarname=any;
data cre_acc ;
  length amount 8 cre acc $32 ;
  input amount cre acc ;
cards;
10 [IC-A_GoC_ACT] [A_PoC_ACT]
50 [IC-A_GoC_ACT] AU_BENEFITS_AS_IC
20 [IC-A_GoC_ACT] AU_BENEFITS
20 [IC-A_GoC_ACT] AU_BENEFITS_NZC
10 [IC-A_PoC_ACT] [A_PoC_ACT]
20 [IC-A_PoC_ACT] AU_BENEFITS_AS_IC
20 [IC-A_PoC_ACT] AU_BENEFITS
;

proc transpose data=cre_acc out=cre_acc_wide;
  by CRE;
  id ACC;
  var amount;
run;

data want;
  set cre_acc_wide;
%include code / source2;
run;

To get the result:

                                   [A_PoC_    AU_BENEFITS_                   AU_BENEFITS_
Obs         cre          _NAME_      ACT]         AS_IC       AU_BENEFITS         NZC        mac    value

 1     [IC-A_GoC_ACT]    amount       10           50              20             20         A2      500
 2     [IC-A_PoC_ACT]    amount       10           20              20              .         A1       10

View solution in original post

Dani_Gor · Posted 07-08-2021 04:49 AM

In the program written the output statement is written outside the do loop. For this reason the code suggested looks like this one:

data Calculation;
set uu;
mac=cats('A',1);
mac=cats('A',2);
...
...
output;
run;

for this reason the mac column is updated before sending in output the row corresponding to mac=A1.
I would suggest to place the output statement inside the macro program if the expected result should contain one row for mac=A1 and one for mac=A2 for every row in dataset uu:

%macro chk;
data Calculation;
set uu;
%do i=1 %to &totobs;
mac=cats('A',&i);
value=&&A&i.;
output;
%end;
run;
%mend;
%chk;

%put &A1.;
%put &A2.;

David_Billa · Posted 07-08-2021 04:58 AM

I'm getting more than one row for Mac=A1 and Mac=A2 if I put the output
statement inside the macro

Kurt_Bremser · Posted 07-08-2021 05:04 AM

Please supply dataset uu in usable form (data step with datalines, DO NOT SKIP THIS!), and what you expect to get as a result.

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

David_Billa · Posted 07-08-2021 05:35 AM

@Kurt_Bremser @Dani_Gor This is my code. In the macro CHK if I Output Statement inside the do Loop I'm getting four rows. Desired result is only two rows as shown in excepted Output.

data i;
    length ACC $50.;
    CRE="[IC-A_PoC_ACT]";
    ACC="[A_PoC_ACT]";
    amount=10;
    output;
    CRE="[IC-A_PoC_ACT]";
    ACC="AU_BENEFITS_AS_IC";
    amount=20;
    output;
    CRE="[IC-A_PoC_ACT]";
    ACC="AU_BENEFITS";
    amount=20;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="[A_PoC_ACT]";
    amount=10;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="AU_BENEFITS_AS_IC";
    amount=50;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="AU_BENEFITS";
    amount=20;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="AU_BENEFITS_NZC";
    amount=20;
    output;
run;

proc Sort data=i;
    by CRE;
run;

Proc Transpose data=i out=i1;
    by CRE;
    id ACC;
    var amount;
run;

data rule;
    length RULE $1000.;
    CRE="[IC-A_PoC_ACT]";
    RULE="('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n";
    output;
    CRE="[IC-A_GoC_ACT]";
    RULE="('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n";
    output;
run;

proc Sql;
    create table uu as select i1.*,rule.rule from i1,rule where i1.CRE=rule.CRE;
quit;

data yy;
    set uu;
    amount=resolve(rule);
run;

data xxxx;
    set uu nobs=n;

    if missing(rule) then
        rule= '.';
    mac=cats('A',_n_);
    call symput(trim(mac),trim(rule));
    call symputx('totobs',n);
run;

%put &totobs;

%macro chk;

    data BRACKET_VAL_CAL;
        set uu;

        %do i=1 %to &totobs;
            mac=cats('A',&i);
            value=&&A&i.;
            output;
        %end;
    run;

%mend;

%chk;
%put &A1.;
%put &A2.;

Excepted Output (gave only last three variables which are important):

RULE                                                                                                                                     mac    value
('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n                                             A1     10
('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n      A2      500

Dani_Gor · Posted 07-08-2021 05:53 AM

Probably a possible solution involving macro variables could be:

%macro chk;

data BRACKET_VAL_CAL;
set uu;

%do i=1 %to &totobs;
mac=cats('A',&i);
value=&&A&i.;
if _N_=&i then output;
%end;
run;
%mend;

or

%macro test;
data BRACKET_VAL_CAL;
set uu;
mac=cats('A',_N_);
%do i=1 %to &totobs;
if _N_=&i then value=&&A&i.;
%end;
run;
%mend test;

%test

Kurt_Bremser · Posted 07-08-2021 06:01 AM

You need to move the SET statement into the macro loop, and use the POINT= option to read the correct observation:

%macro chk;
data BRACKET_VAL_CAL;
%do i=1 %to &totobs;
  _n_ = &i;
  set uu point=_n_;
  mac=cats('A',&i);
  value=&&A&i.;
  output;
%end;
stop;
run;
%mend;
%chk

Also note the necessity of the STOP statement to prevent infinite looping of the data step.

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

David_Billa · Posted 07-08-2021 07:16 AM

@Kurt_Bremser thank you! you're trying to do set Statement in a loop which may cause performance issue with large volume of data. Is there a way to tackle it using any other apporach? Using Arrays or something similar might helps?

Kurt_Bremser · Posted 07-08-2021 07:27 AM

Since the SET with POINT= will iterate through the dataset sequentially, the read should not be much of a problem, performancewise; test it.

If you have to deal with really large data in this way, the sheer amount of macro variables might overwhelm your memory settings. In this case, I would prepare the different formula patterns as branches in a SELECT block, which are then indexed by a suitable variable.

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

David_Billa · Posted 07-08-2021 07:34 AM

@Kurt_Bremser thanks! Pardon me for asking the same question. Can we tackle this macro using Arrays instead of SET with POINT?

Kurt_Bremser · Posted 07-08-2021 08:09 AM

The issue here is that:

You cannot modify the code once a data step starts executing, so you need to supply the dynamic code to the data step compiler while it does its work.

One way is the way I gave you, by writing a separate piece of data step code for every observation; this will end up with a lot of code, and the time needed to compile the code.

If, OTOH, you have a quite finite pattern of formulas that you need to apply, you can code them fixed into the data step and then, while it executes, dynamically select which branch to take. Something like

data want;
set have;
select (indicator);
  when (1) value = ('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n;
  when (2) value = ('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n;
  otherwise putlog "Wrong Indicator!";
end;
run;

This would greatly reduce the code, as you only need number-of-formulas vs. number-of-observations, and it would avoid the possible issue with the POINT= option. And you do not need a macro any longer.

Maxims of Maximally Efficient SAS Programmers
How to convert datasets to data steps
The macro for direct download as ZIP
How to post code
Please vote for Provide Sequential Search Capability for Hash Objects
How to deal with locked files on UNIX

David_Billa · Posted 07-08-2021 09:12 AM

@Kurt_Bremser Formulas are not fixed. It will change often. In real life those formulas will be coming from excel which we will use it after importing those excel.

On a separate note, do I need to use your proposed data step code instread of macro?

Astounding · Posted 07-08-2021 07:06 AM

It is too soon to correct the programming. First, you need to correct the plan.

You start with 2 observations, and you want to end up with 2 observations.

You start with two key variables, MAC and VALUE. And you are going to end up with MAC and VALUE on each observation. How do you propose that MAC and VALUE will receive two values on the same observation? Do you want to create MAC1, MAC2, VALUE1 and VALUE2? There are decisions to be made first, before programming can be effective.

Tom · Posted 07-08-2021 10:21 AM

In general I find it much easier to generate the code from the data directly using the power of the PUT statement instead of making things harder by trying to store data into macro variables.

Trivial example say you have a metadata table with two variables TYPE and EXPRESSION that you want to use to calculate the value of NEWVAR differently based on the value of TYPE. So you might use code like this to generate a series of IF/THEN statements.

filename code temp;
data _null_;
  set metadata;
  file code;
  if _n_>1 then put 'else ' @;
  put 'if ' type= 'then newvar=' expression ';' ;
run;

Which you could then use in another data step to actually calculate NEWVAR.

data want;
  set have;
%include code / source2;
run;

But I have no idea what type of data you have (including any "code" that is in data) nor what you intend to do with it.

Please show an example of what you are actually doing.

David_Billa · Posted 07-08-2021 11:04 AM

@Tom This is my code and it is producing the desired result as given below. My question is to trying identify the alternate approach instead of using SET with POINT in the macro Chk. Data which you seeing in the program may come from excel in the future and it will have many rules (look at data step 'rule'). So I don't want to use CASE WHEN as well.

data i;
    length ACC $50.;
    CRE="[IC-A_PoC_ACT]";
    ACC="[A_PoC_ACT]";
    amount=10;
    output;
    CRE="[IC-A_PoC_ACT]";
    ACC="AU_BENEFITS_AS_IC";
    amount=20;
    output;
    CRE="[IC-A_PoC_ACT]";
    ACC="AU_BENEFITS";
    amount=20;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="[A_PoC_ACT]";
    amount=10;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="AU_BENEFITS_AS_IC";
    amount=50;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="AU_BENEFITS";
    amount=20;
    output;
    CRE="[IC-A_GoC_ACT]";
    ACC="AU_BENEFITS_NZC";
    amount=20;
    output;
run;

proc Sort data=i;
    by CRE;
run;

Proc Transpose data=i out=i1;
    by CRE;
    id ACC;
    var amount;
run;

data rule;
    length RULE $1000.;
    CRE="[IC-A_PoC_ACT]";
    RULE="('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n";
    output;
    CRE="[IC-A_GoC_ACT]";
    RULE="('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n";
    output;
run;

proc Sql;
    create table uu as select i1.*,rule.rule from i1,rule where i1.CRE=rule.CRE;
quit;

data yy;
    set uu;
    amount=resolve(rule);
run;

data xxxx;
    set uu nobs=n;

    if missing(rule) then
        rule= '.';
    mac=cats('A',_n_);
    call symput(trim(mac),trim(rule));
    call symputx('totobs',n);
run;

%put &totobs;

%macro chk;
data BRACKET_VAL_CAL;
%do i=1 %to &totobs;
  _n_ = &i;
  set uu point=_n_;
  mac=cats('A',&i);
  value=&&A&i.;
  output;
%end;
stop;
run;
%mend;
%chk;

%put &A1.;
%put &A2.;

Excepted Output (gave only last three variables which are important):

RULE mac value
('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n)/'AU_BENEFITS'n A1 10
('[A_PoC_ACT]'n*'AU_BENEFITS_AS_IC'n*'AU_BENEFITS_NZC'n)/'AU_BENEFITS'n A2 500

SAS Innovate 2025: Call for Content

Classroom Training Available!