BookmarkSubscribeRSS Feed
☑ This topic is solved. Need further help from the community? Please sign in and ask a new question.
LB
Quartz | Level 8 LB
Quartz | Level 8

OK I have a macro that does work below  however I am looking to execute  a macro within a data step as if this works-there will be other  computations

This macro will designate if a patient has a disease based on ICD 10 code groupings - 

Any assistance appreciated. TIA

 

What works:



data fake_patients;
    length subject $5 ICD1-ICD10 $7;
    array icd_codes[10] $ ICD1-ICD10;

    /* List of ICD-10 codes for various diseases */
    array pneumonia_codes[10] $7 _temporary_ ('J12.0', 'J12.1', 'J12.2', 'J12.3', 'J12.4', 
                                              'J12.5', 'J12.8', 'J12.9', 'J13', 'J14');
    array diabetes_codes[8] $7 _temporary_ ('E10.9', 'E11.9', 'E13.9', 'E14.9', 
                                            'E10.65', 'E11.65', 'E13.65', 'E14.65');
    array hypertension_codes[10] $7 _temporary_ ('I10', 'I11.0', 'I11.9', 'I12.0', 
                                                 'I12.9', 'I13.0', 'I13.10', 'I13.11', 
                                                 'I13.2', 'I15.0');
    array asthma_codes[10] $7 _temporary_ ('J45.20', 'J45.21', 'J45.22', 'J45.30', 
                                           'J45.31', 'J45.32', 'J45.40', 'J45.41', 
                                           'J45.42', 'J45.50');
    array other_codes[10] $7 _temporary_ ('A00', 'A01', 'A02', 'A03', 'A04', 
                                          'A05', 'A06', 'A07', 'A08', 'A09');
    
    /* Create 50 fake patients */
    do subject_id = 1 to 50;
        subject = cats('S', put(subject_id, z3.));

        /* Assign random ICD-10 codes to each patient */
        do i = 1 to 10;
            /* Randomly choose a disease category */
            disease_category = rand('integer', 1, 5);
            
            select (disease_category);
                when (1) icd_codes[i] = pneumonia_codes[rand('integer', 1, dim(pneumonia_codes))];
                when (2) icd_codes[i] = diabetes_codes[rand('integer', 1, dim(diabetes_codes))];
                when (3) icd_codes[i] = hypertension_codes[rand('integer', 1, dim(hypertension_codes))];
                when (4) icd_codes[i] = asthma_codes[rand('integer', 1, dim(asthma_codes))];
                when (5) icd_codes[i] = other_codes[rand('integer', 1, dim(other_codes))];
            end;
        end;

        output;
    end;
run;




data diseases;
    length disease $20 DX1-DX10 $7;

    /* Pneumonia and its ICD-10 codes */
    disease = "Pneumonia";
    DX1 = 'J12.0'; DX2 = 'J12.1'; DX3 = 'J12.2'; DX4 = 'J12.3';
    DX5 = 'J12.4'; DX6 = 'J12.5'; DX7 = 'J12.8'; DX8 = 'J12.9';
    DX9 = 'J13'; DX10 = 'J14';
    output;

    /* Diabetes and its ICD-10 codes */
    disease = "Diabetes";
    DX1 = 'E10.9'; DX2 = 'E11.9'; DX3 = 'E13.9'; DX4 = 'E14.9';
    DX5 = 'E10.65'; DX6 = 'E11.65'; DX7 = 'E13.65'; DX8 = 'E14.65';
    DX9 = ''; DX10 = '';
    output;

    /* Hypertension and its ICD-10 codes */
    disease = "Hypertension";
    DX1 = 'I10'; DX2 = 'I11.0'; DX3 = 'I11.9'; DX4 = 'I12.0';
    DX5 = 'I12.9'; DX6 = 'I13.0'; DX7 = 'I13.10'; DX8 = 'I13.11';
    DX9 = 'I13.2'; DX10 = 'I15.0';
    output;

    /* Asthma and its ICD-10 codes */
    disease = "Asthma";
    DX1 = 'J45.20'; DX2 = 'J45.21'; DX3 = 'J45.22'; DX4 = 'J45.30';
    DX5 = 'J45.31'; DX6 = 'J45.32'; DX7 = 'J45.40'; DX8 = 'J45.41';
    DX9 = 'J45.42'; DX10 = 'J45.50';
    output;
run;


%macro sendR2D2(data=, var=);
   proc sort data=&data(keep=&var) out=values nodupkey;
       by &var;
   run;

   data _null_;
      set values end=last;
      call symputx('DIS'||left(_n_), &var);
      if last then call symputx('countx', _n_, 'g');
   run;

%put _local_;

%do i=1 %to &countx;

proc sql noprint;
    select  catx(',', compress("'"||DX1||"'"), compress("'"||DX2||"'"), compress("'"||DX3||"'"), 
compress("'"||DX4||"'"),compress("'"||DX5||"'"),compress("'"||DX6||"'"), compress("'"||DX7||"'"), compress("'"||DX8||"'"), compress("'"||DX9||"'"), compress("'"||DX10||"'")) 
    into :DX 
    from diseases where disease="&&DIS&i";
quit;

%put &DX;

   
 data &&DIS&i (Keep= subject &&DIS&i);
 set work.fake_patients;
array icd_codes[10] $ ICD1-ICD10;

&&DIS&i=0;
do j = 1 to dim(icd_codes);
        if icd_codes[j] in  (&DX) then do;
&&DIS&i=1;
leave;
 
        end;
    end;
	
run;


		
%end;

%mend;

%sendR2D2(data=diseases,var=disease);


Data want2;
merge work.fake_patients work.asthma work.diabetes work.hypertension work.pneumonia;
by subject;
run;

What doesn't work and need to resolve:


data fake_patients;
    length subject $5 ICD1-ICD10 $7;
    array icd_codes[10] $ ICD1-ICD10;

    /* List of ICD-10 codes for various diseases */
    array pneumonia_codes[10] $7 _temporary_ ('J12.0', 'J12.1', 'J12.2', 'J12.3', 'J12.4', 
                                              'J12.5', 'J12.8', 'J12.9', 'J13', 'J14');
    array diabetes_codes[8] $7 _temporary_ ('E10.9', 'E11.9', 'E13.9', 'E14.9', 
                                            'E10.65', 'E11.65', 'E13.65', 'E14.65');
    array hypertension_codes[10] $7 _temporary_ ('I10', 'I11.0', 'I11.9', 'I12.0', 
                                                 'I12.9', 'I13.0', 'I13.10', 'I13.11', 
                                                 'I13.2', 'I15.0');
    array asthma_codes[10] $7 _temporary_ ('J45.20', 'J45.21', 'J45.22', 'J45.30', 
                                           'J45.31', 'J45.32', 'J45.40', 'J45.41', 
                                           'J45.42', 'J45.50');
    array other_codes[10] $7 _temporary_ ('A00', 'A01', 'A02', 'A03', 'A04', 
                                          'A05', 'A06', 'A07', 'A08', 'A09');
    
    /* Create 50 fake patients */
    do subject_id = 1 to 50;
        subject = cats('S', put(subject_id, z3.));

        /* Assign random ICD-10 codes to each patient */
        do i = 1 to 10;
            /* Randomly choose a disease category */
            disease_category = rand('integer', 1, 5);
            
            select (disease_category);
                when (1) icd_codes[i] = pneumonia_codes[rand('integer', 1, dim(pneumonia_codes))];
                when (2) icd_codes[i] = diabetes_codes[rand('integer', 1, dim(diabetes_codes))];
                when (3) icd_codes[i] = hypertension_codes[rand('integer', 1, dim(hypertension_codes))];
                when (4) icd_codes[i] = asthma_codes[rand('integer', 1, dim(asthma_codes))];
                when (5) icd_codes[i] = other_codes[rand('integer', 1, dim(other_codes))];
            end;
        end;

        output;
    end;
run;




data diseases;
    length disease $20 DX1-DX10 $7;

    /* Pneumonia and its ICD-10 codes */
    disease = "Pneumonia";
    DX1 = 'J12.0'; DX2 = 'J12.1'; DX3 = 'J12.2'; DX4 = 'J12.3';
    DX5 = 'J12.4'; DX6 = 'J12.5'; DX7 = 'J12.8'; DX8 = 'J12.9';
    DX9 = 'J13'; DX10 = 'J14';
    output;

    /* Diabetes and its ICD-10 codes */
    disease = "Diabetes";
    DX1 = 'E10.9'; DX2 = 'E11.9'; DX3 = 'E13.9'; DX4 = 'E14.9';
    DX5 = 'E10.65'; DX6 = 'E11.65'; DX7 = 'E13.65'; DX8 = 'E14.65';
    DX9 = ''; DX10 = '';
    output;

    /* Hypertension and its ICD-10 codes */
    disease = "Hypertension";
    DX1 = 'I10'; DX2 = 'I11.0'; DX3 = 'I11.9'; DX4 = 'I12.0';
    DX5 = 'I12.9'; DX6 = 'I13.0'; DX7 = 'I13.10'; DX8 = 'I13.11';
    DX9 = 'I13.2'; DX10 = 'I15.0';
    output;

    /* Asthma and its ICD-10 codes */
    disease = "Asthma";
    DX1 = 'J45.20'; DX2 = 'J45.21'; DX3 = 'J45.22'; DX4 = 'J45.30';
    DX5 = 'J45.31'; DX6 = 'J45.32'; DX7 = 'J45.40'; DX8 = 'J45.41';
    DX9 = 'J45.42'; DX10 = 'J45.50';
    output;
run;

%macro sendR2D3 (data=, var=);
   proc sort data=&data(keep=&var) out=values nodupkey;
       by &var;
   run;

   data _null_;
      set values end=last;
      call symputx('DIS'||left(_n_), &var);
      if last then call symputx('countx', _n_, 'g');
   run;

%put _local_;

%do i=1 %to &countx;

proc sql noprint;
    select  catx(',', compress("'"||DX1||"'"), compress("'"||DX2||"'"), compress("'"||DX3||"'"), 
compress("'"||DX4||"'"),compress("'"||DX5||"'"),compress("'"||DX6||"'"), compress("'"||DX7||"'"), compress("'"||DX8||"'"), compress("'"||DX9||"'"), compress("'"||DX10||"'")) 
    into :DX 
    from diseases where disease="&&DIS&i";
quit;

%put &DX;

   
array icd_codes[10] $ ICD1-ICD10;

&&DIS&i=0;
do j = 1 to dim(icd_codes);
        if icd_codes[j] in  (&DX) then do;
&&DIS&i=1;
leave;
 
        end;
    end;
	
run;


		
%end;

%mend;





Data want3;
set work.fake_patients;
%sendR2D3(data=diseases,var=disease);
run;
1 ACCEPTED SOLUTION

Accepted Solutions
Tom
Super User Tom
Super User

Let's take your code:

data want3;
  set work.fake_patients;
%sendR2D3(data=diseases,var=disease);
run;

While SAS is compiling that code it first sees the DATA statement. So it knows you want to start a new step.  So if you had previously had something like a PROC or DATA step without an terminal command like RUN or QUIT ( or DATALINES for a data step) then that previous steps compilation is finished and the step runs.  Now it starts trying to build the DATA step.

 

Then it sees the SET statement.  So it knows to add the variables from FAKE_PATIENTS into the data vector for this data step.

 

Then the macro pre-processor sees the macro call.  So it takes over and begins running the macro.  Sending any statements it emits back to the SAS compiler to evaluate.  So the first statement that the macro emits is a PROC SORT statement.  So the SAS compiler sees that and knows that the definition of the DATA step is complete. So the data step runs.

 

Next the macro emits the rest of the PROC SORT step. So that now runs.  Then it emits a DATA step, so that runs.  Then it emits the values of the macro variables.  Which are not valid outside of a DATA step so the SAS compiler generates an error.

 

Finally, after the macro has finished, the SAS compiler sees the RUN statement in the program source.

View solution in original post

13 REPLIES 13
ballardw
Super User

If a macro contains data step code or procedure then when the DATA or Proc statement is encountered it ends the data step.

What would expect to have happen with:

data junk; 
   set sashelp.class;
   ratio = weight/height;
proc sort data=sashelp.class
   out=dummy;
  by sex;
run;

If your answer is not something like "when the data junk step encounters the Proc sort statement then an implied run is used and the data set junk is created with the added variable using the sashelp.class data. Then the sort executes."

 

Other statements may have interesting interactions depending on exactly which class of statement is generated.

 

You also have a timing issue about when MACRO variables are resolved in data step code. They are resolved at compilation time.

 

I'm too tired at the moment to spend anytime trying to determine what your "need to resolve" code is attempting to do, especially since you didn't even provide a brief description of intent.

LB
Quartz | Level 8 LB
Quartz | Level 8

I do apologize that description was way too terse. In short in the 2nd dataset -diseases -this will be a dynamic dataset that will be updated probably monthly or quarterly with at least 50 disease categories.  While the fake_patients dataset has 50 patients and 10 ICD codes in reality - will be getting all the diagnosis codes in a horizontal fashion  with 200,000 actual patients.  The first example already works and if I have to I will use that,  However if I can get the 2nd example to work it would be more efficient. 

Quentin
Super User

Agree with @ballardw 's analysis.  

 

Part of the hard work of asking an answerable question is to build a *small* clear example that illustrates the problem.  Typically this can be done with a sample dataset of 5-10 records, and just one or two steps.  But doing that requires you to simplify your problem dramatically, until you have a minimal reproducible example (MRE).  Many times as you try to construct an MRE, you will find the answer.  

 

If you construct an MRE and cannot find the answer, please post it.  And also, in the context of the MRE, please don't say that it 'doesn't work'.  Instead, tell us whether you have errors/warnings in the log, or if the log is clean but you are getting surprising results.

The Boston Area SAS Users Group (BASUG) is hosting our in person SAS Blowout on Oct 18!
This full-day event in Cambridge, Mass features four presenters from SAS, presenting on a range of SAS 9 programming topics. Pre-registration by Oct 15 is required.
Full details and registration info at https://www.basug.org/events.
ballardw
Super User

If this is supposed to be some sort of look up to tell if a specific instance of DX is a specific disease related ICD code then perhaps a custom format would be the way to look things up.

 

proc format 
value code2disease
'J12.0', 'J12.1', 'J12.2', 'J12.3', 'J12.4',
'J12.5', 'J12.8', 'J12.9', 'J13'  , 'J14'      = "Pneumonia"
/* repeat for others of interest*/
other='Not listed'
;

run;

And use in a data step might look like

 

if put(dx[i], code2disease.) ='Pneumonia' then <do what you want when found> 

Or a similar informat.

One thing about the format/ informat approach would be that you can have varying numbers of codes you don't have to try to keep track, the SAS use of the format maintains the list size and uses it automaticaly.

LB
Quartz | Level 8 LB
Quartz | Level 8

Interesting idea. I could output a dynamic  script with new ICD codes. I'll have to think about that.

Tom
Super User Tom
Super User

When running macros make sure that the MPRINT option is turned on so you can see in the SAS log the SAS code that the macro generated.

 

This part of you macro makes no sense:

%do i=1 %to &countx;
proc sql noprint;
    select  catx(',', compress("'"||DX1||"'"), compress("'"||DX2||"'"), compress("'"||DX3||"'"), 
compress("'"||DX4||"'"),compress("'"||DX5||"'"),compress("'"||DX6||"'"), compress("'"||DX7||"'"), compress("'"||DX8||"'"), compress("'"||DX9||"'"), compress("'"||DX10||"'")) 
    into :DX 
    from diseases where disease="&&DIS&i";
quit;
%put &DX;
array icd_codes[10] $ ICD1-ICD10;
&&DIS&i=0;
do j = 1 to dim(icd_codes);
    if icd_codes[j] in  (&DX) then do;
        &&DIS&i=1;
        leave;
    end;
 end;
run;
%end;

When the macro variable I has the value of 1 you generate a PROC SQL step.  Then immediately generate an ARRAY statement.  You cannot have an ARRAY statement outside of a data step.  Even if you remove the PROC SQL code it does not make any sense to the have the ARRAY statement inside the %DO loop.  You only want to have one ARRAY statement for the array named ICD_CODES.

LB
Quartz | Level 8 LB
Quartz | Level 8

Thank you for your response. But then if I write something to this effect:

 

%macro sendR2D3(data=, var=);
   proc sort data=&data(keep=&var) out=values nodupkey;
       by &var;
   run;

   data _null_;
      set values end=last;
      call symputx('DIS'||left(_n_), &var);
      if last then call symputx('countx', _n_, 'g');
   run;

%put _local_;

%do i=1 %to &countx;

&&DIS&i=0;

		
%end;

%mend;




Data want3;
set work.fake_patients;
%sendR2D3(data=diseases,var=disease);
run;

and I run it I get a 'Statement is not valid or it is used out of proper order.' 

I can see that it does  generate the macro variable though -EX NOTE: Line generated by the macro variable "DIS1".
133 Asthma

 

If I just run: 

Data want3;
set work.fake_patients;
asthma=0;
run;

That is fine. SO it is my lack of understanding  of what I can/can't do with macro in respects to running it within a data step.  Thank you, 

Tom
Super User Tom
Super User

If you are having trouble visualizing what CODE the macro will generate use the MPRINT option and you SEE it in the LOG.

 

That macro will first run a PROC step, then a DATA step.  Then it will EMIT the values of a series of macro variables.  So if the values of variable named DISEASE in the dataset named DISEASES has valid SAS code like:

data diseases;
  disease='data x; set y; run;' ;
run;

Then your macro makes some sense.

 

But if instead DISEASE just have values like: A21, B54 then it makes no sense to have the macro generate code like:

proc sort; .....
data ....
A21;
B54;

The SAS language complier is not going to understand those last two statements.

LB
Quartz | Level 8 LB
Quartz | Level 8

Ok-Thank you for that info. I do have the MPRINT option  on- 

 

When I attempt to execute  the macro 

 

It would go like this:

 

data set3;

set fake_patients;

 

---macro statement here---

 

macro renders as asthma=0;

 

run;

 

so to me -it appears that the macro var is defined within the proc step. But obviously not!   

Tom
Super User Tom
Super User

Let's take your code:

data want3;
  set work.fake_patients;
%sendR2D3(data=diseases,var=disease);
run;

While SAS is compiling that code it first sees the DATA statement. So it knows you want to start a new step.  So if you had previously had something like a PROC or DATA step without an terminal command like RUN or QUIT ( or DATALINES for a data step) then that previous steps compilation is finished and the step runs.  Now it starts trying to build the DATA step.

 

Then it sees the SET statement.  So it knows to add the variables from FAKE_PATIENTS into the data vector for this data step.

 

Then the macro pre-processor sees the macro call.  So it takes over and begins running the macro.  Sending any statements it emits back to the SAS compiler to evaluate.  So the first statement that the macro emits is a PROC SORT statement.  So the SAS compiler sees that and knows that the definition of the DATA step is complete. So the data step runs.

 

Next the macro emits the rest of the PROC SORT step. So that now runs.  Then it emits a DATA step, so that runs.  Then it emits the values of the macro variables.  Which are not valid outside of a DATA step so the SAS compiler generates an error.

 

Finally, after the macro has finished, the SAS compiler sees the RUN statement in the program source.

LB
Quartz | Level 8 LB
Quartz | Level 8

Got it. Makes sense now. Thank you.

 

 

Tom
Super User Tom
Super User

Here is an order of operations that makes sense for your example:

 

First process the dataset and generate the assignment statements into a FILE. Then use %INCLUDE to execute the statements in the FILE where you want them to appear.

 

So your macro might look like this:

%macro sendR2D3(data=, var=, file=);
proc sort data=&data(keep=&var) out=values nodupkey;
  by &var;
run;

data _null_;
  set values ;
  file &file;
  put &var '=0;' ;
run;
%mend ;

And you could then use it like this:

filename code temp;
%sendR2D3(data=diseases,var=disease,file=code);
data want3;
  set work.fake_patients;
%include code / source2;
run;
LB
Quartz | Level 8 LB
Quartz | Level 8
This is very helpful in understanding how it all works. Thank you.

SAS Innovate 2025: Call for Content

Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!

Submit your idea!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 13 replies
  • 1265 views
  • 4 likes
  • 4 in conversation