I am trying to run the following macro in SAS Enterprise Guide version 7.1 which I found in Paper SDA-09, "A Macro for Calculating Summary Statistics on Left Censored Environmental Data using the Kaplan-Meier Method" authored by Dennis Beal. When I run the macro using my own data I get the following error message: "ERROR: Variable popn_num is not on file WORK.PEST_2017_RESULT_FORPROGRAM.” I'm a relative beginner is SAS coding and have not worked with macros much. Why am I getting this error? Here is what the 1st few records in my data file look like:
SAMPLE_PK | COMMOD | COMMTYPE | PESTCODE | chemical | RESULTS | HIT | CONCEN | LOD | CONUNIT |
1 | HY | RE | 011 | Captan | 0.5 | 0 | | 0.5 | M |
1 | HY | RE | 024 | Diazinon | 0.05 | 0 | | 0.05 | M |
1 | HY | RE | 032 | Diuron | 0.002 | 0 | | 0.002 | M |
1 | HY | RE | 042 | Azinphos methyl | 0.5 | 0 | | 0.5 | M |
1 | HY | RE | 052 | Malathion | 0.05 | 0 | | 0.05 | M |
1 | HY | RE | 057 | Parathion methyl | 0.05 | 0 | | 0.05 | M |
I would like to calculate the KM-mean for each chemical in this file.
Here is the code I am trying to run:
/*Before running the SAS code, some initial macros must be executed. The following macro OBSNVARS simply stores the number of records and variables in a data set into SAS macro variables.*/
%macro obsnvars(ds);
%* this macro returns the number of variables and observations from a data set;
%global dset nvars nobs;
%let dset=&ds;
%let dsid = %sysfunc(open(&dset));
%if &dsid %then %do;
%let nobs =%sysfunc(attrn(&dsid,NOBS));
%let nvars=%sysfunc(attrn(&dsid,NVARS));
%let rc = %sysfunc(close(&dsid));
%end;
%else
%put Open for data set &dset failed - %sysfunc(sysmsg());
%mend obsnvars;
/*The DS_NAMES2 macro sets a number of SAS data sets together.*/
%macro ds_names2(name, num);
%do zz = 1 %to #
&name&zz.
%end;
%mend ds_names2;
/*The following macro definitions are determining the number of chemicals that will be run and a SORTBY macro variable where additional variables can be added for sorting the data.*/
%let N_POPNS =1;
%let SORTBY = chemical;
/*The macro calc_km calculates the KM method using Eqns. 1 – 6. The data set b4 (I changed b4 to WORK.PEST_2017_RESULT_FORPROGRAM) contains the data, where the variable HIT = 0 for censored results (non-detects),
/*and HIT = 1 for uncensored detected results. The variable RESULTS is the reported measurement from the laboratory for detects or the reporting limit for non-detects.*/
%macro calc_km;
%do POPNUM = 1 %to &N_POPNS;
data one&POPNUM.;
set WORK.PEST_2017_RESULT_FORPROGRAM;
where popn_num = &POPNUM.; run;
%obsnvars(one&POPNUM.); %let TOTN = &nobs;
proc sort data=one&POPNUM.; by &SORTBY RESULTS;
proc summary data=one&POPNUM.;
var results;
where hit=0; ** nondetects only;
output out=nondets(drop=_type_ _freq_) n=nds; run;
%obsnvars(nondets); %let N_NONDETECTS = &nobs;
proc summary data=one&POPNUM.;
var results;
where HIT=1; ** detects only;
by &SORTBY results;
output out=distinct&POPNUM.(drop=_type_ _freq_) n=mi;
proc print data=distinct&POPNUM.; run;
%obsnvars(distinct&POPNUM.); %let N_DISTINCT_LEVELS = &nobs;
data hitsonly;
set one&POPNUM.;
where hit=1; run;
%obsnvars(hitsonly); %let N_DETECTS = &nobs;
%macro calc_ni;
proc transpose data=distinct&POPNUM. out=outt&POPNUM.; var results; run;
** detected distinct results only;
data a&POPNUM.;
set one&POPNUM.;
if _N_=1 then set outt&POPNUM.;
%do i = 1 %to &N_DISTINCT_LEVELS;
if results <= col&i. then N&i.=1; else N&i.=0;
if results = col&i. and hit=1 then M&i.=1; else M&i.=0;
%end; run;
proc summary data=a&POPNUM.;
var N1-N&N_DISTINCT_LEVELS. M1-M&N_DISTINCT_LEVELS.;
by &SORTBY col1-col&N_DISTINCT_LEVELS;
output out=calc_ni_out&POPNUM.(drop=_type_ _freq_)
sum=N1-N&N_DISTINCT_LEVELS. M1-M&N_DISTINCT_LEVELS.;
proc print data=calc_ni_out&POPNUM.; run;
proc summary data=a&POPNUM.;
var hit;
output out=hitsum&POPNUM.(drop=_type_ _freq_) sum=N_DETECTS;
data calc_cdf&POPNUM.;
set calc_ni_out&POPNUM.;
if _N_=1 then set hitsum&POPNUM.;
N_DISTINCT_DETECT_LEVELS = &N_DISTINCT_LEVELS;
CDF&N_DISTINCT_LEVELS = 1;
CDF0 = 0;
%do i = &N_DISTINCT_LEVELS %to 2 %by -1;
%let N_DISTINCT_LEVELS_M1 = %eval(&i. - 1);
CDF&N_DISTINCT_LEVELS_M1 = cdf&i. * (n&i. - m&i.) / n&i.;
%end;
KM_MEAN=0;
%do i = 1 %to &N_DISTINCT_LEVELS; ** calculate KM mean ;
%let im1 = %eval(&i. - 1);
KM_MEAN = KM_MEAN + col&i. * (cdf&i. - cdf&im1.);
%end;
KM_VAR=0;
%do i = 1 %to &N_DISTINCT_LEVELS; ** calculate KM standard deviation;
%let im1 = %eval(&i. - 1);
KM_VAR = KM_VAR + (col&i. - km_mean)**2 * (cdf&i. - cdf&im1.);
%end;
KM_STD = sqrt(km_var);
%do j = 1 %to &N_DISTINCT_LEVELS - 1; ** calculate KM SE of the mean;
a&j. = 0;
%do i = 1 %to &j;
%let ip1 = %eval(&i. + 1);
a&j. = a&j. + (col&ip1. - col&i.) * cdf&i.;
%end;
%end;
sum = 0;
%do i = 1 %to &N_DISTINCT_LEVELS - 1;
%let ip1 = %eval(&i. + 1);
sum = sum + a&i.**2 * m&ip1. / (n&ip1. * (n&ip1. - m&ip1.));
%end;
if n_detects > 1 then KM_STDERR2 = n_detects * sum / (n_detects - 1);
KM_STDERR = sqrt(km_stderr2);
drop sum; run;
6
%mend calc_ni;
%if &N_DISTINCT_LEVELS > 1 and &N_NONDETECTS > 0 %then %do;
%calc_ni
%end;
%end;
data allkm;
set %ds_names2(calc_cdf, &N_POPNS);
keep &SORTBY n_distinct_detect_levels km_mean km_var km_std km_stderr; run;
proc print data=allkm; run;
%mend calc_km;
%calc_km