The macro first uses the complete data to obtain initial estimates. Then then the macro impute the survival times of the censored observations using EM algorithm as in Buckley-James method . The macro uses GLM for obtaining initial parameter estimate as well as parameter estimate at each iteration. If the GLM is ran outside of the macro, the GLM code works well, and the parameter estimates in the ODS tables are created, but when the macro is ran it says 'ParameterEstimates' was not created. Any help is appreciated.
%MACRO BJ_PREDICTION(data_in, y_var, censor_var, x_vars, cat_vars, new_data,
max_iter=50, tolerance=1E-6, n_bootstrap=100);
/* Step 0: Get number of observations for bootstrapping. */
%LOCAL n_obs;
PROC SQL NOPRINT;
SELECT COUNT(*) INTO :n_obs FROM &data_in;
QUIT;
/* Step 0.5: Create an empty dataset to store coefficients from each bootstrap sample */
PROC SQL;
CREATE TABLE bootstrap_coeffs (
boot_iter NUM,
varname CHAR(32),
estimate NUM
);
QUIT;
/* Start the bootstrap loop */
%DO boot_iter = 1 %TO &n_bootstrap;
%PUT NOTE: Starting bootstrap iteration &boot_iter. of &n_bootstrap.;
/* Take a sample with replacement for bootstrapping */
PROC SURVEYSELECT DATA=&data_in OUT=boot_sample METHOD=SRS SAMPSIZE=&n_obs REPS=1 SEED=&boot_iter;
RUN;
%LOCAL iter_count beta_intercept_old has_converged;
%LET iter_count = 0;
%LET has_converged = 0;
/* Initial GLM on uncensored data for starting values */
/* Check if there are any uncensored observations to prevent GLM failure */
DATA _initial_data_uncer;
SET boot_sample(WHERE=(&censor_var=1));
RUN;
proc print data=_initial_data_uncer;
title "Contents of _initial_data_uncer";
run;
title;
ODS OUTPUT ParameterEstimates=initial(RENAME=(parameter=varname Estimate=estimate));
proc glm data=_initial_data_uncer noprint ;
class gender;
model time= age height gender/solution;
run;
proc print data=initial;
run;
/* Check if the dataset is not empty before running GLM */
%LOCAL n_uncensored;
PROC SQL NOPRINT;
SELECT COUNT(*) INTO :n_uncensored FROM _initial_data_uncer;
QUIT;
%PUT &n_uncensored;
%IF &n_uncensored > 0 %THEN %DO;
/* First, specify the ODS statement to capture the output */
*ODS OUTPUT ParameterEstimates=initial_betas(RENAME=(_NAME_=varname Estimate=estimate));
ODS OUTPUT ParameterEstimates=initial_betas(RENAME=(Parameter=varname Estimate=estimate));
PROC GLM DATA=_initial_data_uncer NOPRINT;
%IF "%TRIM(&cat_vars)" NE "" %THEN %DO;
CLASS &cat_vars;
%END;
MODEL &y_var = %TRIM(&x_vars) %TRIM(&cat_vars)/solution;
RUN;
QUIT;
/* Turn off the ODS output capture */
ODS OUTPUT CLOSE;
/* Read initial coefficients into macro variables */
DATA _NULL_;
SET initial_betas;
IF varname = '_Intercept_' THEN CALL SYMPUT('beta_intercept_old', put(estimate, 16.10));
ELSE CALL SYMPUT('beta_' || compress(varname) || '_old', put(estimate, 16.10));
RUN;
%END;
%ELSE %DO;
%PUT NOTE: No uncensored observations in bootstrap sample, skipping GLM for initial values.;
%LET beta_intercept_old = 0;
%END;
/* Inner loop for Buckley-James iteration on the bootstrap sample */
%DO %WHILE(&iter_count < &max_iter AND &has_converged = 0);
%LET iter_count = %EVAL(&iter_count + 1);
%PUT NOTE: Inside Buckley-James iterative loop, iteration &iter_count..;
/* Step 2: Calculate linear predictor and residuals */
DATA temp_data_&boot_iter;
SET boot_sample;
_Xb_ = %SYSCALL(resolve('&beta_intercept_old'));
%DO i=1 %TO %SYSFUNC(COUNTW(&x_vars));
%LET var = %SCAN(&x_vars, &i);
_Xb_ = _Xb_ + %SYSCALL(resolve("&&beta_&var._old")) * &var;
%END;
_residual_ = &y_var - _Xb_;
RUN;
/* Step 3: Estimate Kaplan-Meier curve for residuals */
PROC LIFETEST DATA=temp_data_&boot_iter OUTSURV=resid_surv_&boot_iter(KEEP=_residual_ _surv_ _NAME_ _TIME_) NOPRINT;
TIME _residual_ * &censor_var(0);
RUN;
/* Step 4: Impute censored residuals using conditional expectation */
DATA imputed_data_&boot_iter;
MERGE temp_data_&boot_iter resid_surv_&boot_iter;
BY _residual_;
_imputed_y_ = &y_var; /* Default to original value */
RETAIN last_surv;
IF _surv_ NE . THEN last_surv = _surv_;
IF &censor_var=0 THEN DO; /* Impute only for censored observations */
_imputed_y_ = _Xb_ + (_residual_ + last_surv / ( - &y_var._residual_)); /* Simplified Imputation */
END;
ELSE _imputed_y_ = &y_var;
RUN;
/* Step 5: Re-estimate coefficients and check for convergence */
ODS OUTPUT ParameterEstimates=new_betas(RENAME=(Parameter=varname Estimate=estimate));
PROC GLM DATA=imputed_data_&boot_iter NOPRINT;
%IF "%TRIM(&cat_vars)" NE "" %THEN %DO;
CLASS &cat_vars;
%END;
MODEL _imputed_y_ = &x_vars &cat_vars/solution;
RUN;
QUIT;
/* Turn off the ODS output capture */
ODS OUTPUT CLOSE;
/* Step 6: Check for convergence and update coefficients */
%LOCAL diff;
PROC SQL NOPRINT;
SELECT MAX(ABS(new.estimate - old.estimate)) INTO :diff
FROM new_betas AS new
LEFT JOIN initial_betas AS old
ON new.varname = old.varname;
QUIT;
%IF &diff < &tolerance %THEN %LET has_converged = 1;
/* Update macro variables with new coefficients */
DATA _NULL_;
SET new_betas;
IF varname = '_Intercept_' THEN CALL SYMPUT('beta_intercept_old', put(estimate, 16.10));
ELSE CALL SYMPUT('beta_' || compress(varname) || '_old', put(estimate, 16.10));
RUN;
%END; /* End of inner iterative loop */
/* Store the final coefficients for this bootstrap run */
ODS OUTPUT ParameterEstimates=final_betas(RENAME=(Parameter=varname Estimate=estimate));
PROC GLM DATA=_initial_data_uncer NOPRINT;
%IF "%TRIM(&cat_vars)" NE "" %THEN %DO;
CLASS &cat_vars;
%END;
MODEL _imputed_y_ = &x_vars &cat_vars/solution;
RUN;
QUIT;
/* Turn off the ODS output capture */
ODS OUTPUT CLOSE;
PROC SQL;
INSERT INTO bootstrap_coeffs
SELECT &boot_iter AS boot_iter, varname, estimate
FROM final_betas;
QUIT;
%END; /* End of bootstrap loop */
/* Step 7: Calculate final estimates and standard errors from bootstrap results */
PROC MEANS DATA=bootstrap_coeffs N MEAN STD MAXDEC=4 NOPRINT;
VAR estimate;
BY varname;
OUTPUT OUT=bj_results(DROP=_TYPE_ _FREQ_ RENAME=(MEAN=estimate STD=std_err));
RUN;
/* Step 8: Final model run on original data to get imputed y values and the item store file */
DATA final_imputed_data;
SET &data_in;
_Xb_ = %SYSCALL(resolve('&beta_intercept_old'));
%DO i=1 %TO %SYSFUNC(COUNTW(&x_vars));
%LET var = %SCAN(&x_vars, &i);
_Xb_ = _Xb_ + %SYSCALL(resolve("&&beta_&var._old")) * &var;
%END;
_residual_ = &y_var - _Xb_;
RUN;
PROC LIFETEST DATA=final_imputed_data OUTSURV=final_resid_surv NOPRINT;
TIME _residual_ * &censor_var(0);
RUN;
/* Rerun GLM on the fully imputed dataset to get the final model store */
DATA imputed_original_data;
MERGE final_imputed_data final_resid_surv;
BY _residual_;
RETAIN last_surv;
IF _surv_ NE . THEN last_surv = _surv_;
IF &censor_var=0 THEN DO;
_imputed_y_ = _Xb_ + (_residual_ + last_surv / ( - &y_var._residual_));
END;
ELSE _imputed_y_ = &y_var;
RUN;
PROC GLM DATA=imputed_original_data NOPRINT;
MODEL _imputed_y_ = &x_vars &cat_vars;
%IF "%TRIM(&cat_vars)" NE "" %THEN %DO;
CLASS &cat_vars;
%END;
STORE bj_model_store;
RUN;
/* Step 9: Make predictions on a new dataset using PROC PLM */
PROC PLM RESTORE=bj_model_store;
SCORE DATA=&new_data OUT=scored_data PRED;
RUN;
/* Step 10: Compute predicted survival times by adding the median residual to the linear predictor */
PROC UNIVARIATE DATA=final_resid_surv NOPRINT;
VAR _residual_;
OUTPUT OUT=median_resid_out(KEEP=median_residual) MEDIAN=median_residual;
RUN;
DATA bj_predictions;
MERGE scored_data(IN=a) median_resid_out(IN=b);
IF a THEN DO;
predicted_time = PRED + median_residual;
OUTPUT;
END;
RUN;
/* Clean up temporary datasets and the model store */
PROC DATASETS LIB=WORK NOLIST;
DELETE _initial_data_uncer boot_sample temp_data_: resid_surv_: imputed_data_: initial_betas
new_betas final_betas bootstrap_coeffs final_imputed_data final_resid_surv
imputed_original_data scored_data median_resid_out;
RUN;
PROC CATALOG CATALOG=WORK.SASMACR DELETE bj_model_store.ITEMSTORE;
RUN;
%MEND BJ_PREDICTION;
and the
Put ODS statements after the PROC STATEMENT and always end interactive procedures with a QUIT statement. https://documentation.sas.com/doc/en/statug/15.2/statug_ods_examples06.htm
Thanks.
If the GLM does not produce any estimates because of the data you gave it then that is normal.
Check the SAS log.
Thanks
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.