Hi! I am running an exact-matching using the greedy algorithm. I ran the macro code that I found it online and there was the error "insufficient memory to data step program. The SAS system stopped processing this step because of insufficient memory." I doubled my sas memory to 4g but the error persisted. I stumbled upon a post detailing a similar issue at this link: https://communities.sas.com/t5/SAS-Programming/Hash-object-with-Insufficient-memory-to-execute-data-step/td-p/440515 As a SAS beginner, I find myself unable to modify my code effectively to reduce the memory demands of the hash object. Any suggestions or coding help would be highly appreciated! Here is the code: %MACRO MATCH(CASE=,CONTROL=,IDCA=,IDCO=,MVARS=,WTS=,DMAXK=,DMAX=,
NCONTLS=1, TIME=,
METHOD=,SEEDCA=,SEEDCO=,MAXITER=100000,PRINT=y,
OUT=__OUT,OUTNMCA=__NMCA,OUTNMCO=__NMCO,MINCONT=,MAXCONT=);
%LET BAD=0;
%IF %LENGTH(&CASE)=0 %THEN %DO;
%PUT ERROR: NO CASE DATASET SUPPLIED;
%LET BAD=1;
%END;
%IF %LENGTH(&CONTROL)=0 %THEN %DO;
%PUT ERROR: NO CONTROL DATASET SUPPLIED;
%LET BAD=1;
%END;
%IF %LENGTH(&IDCA)=0 %THEN %DO;
%PUT ERROR: NO IDCA VARIABLE SUPPLIED;
%LET BAD=1;
%END;
%IF %LENGTH(&IDCO)=0 %THEN %DO;
%PUT ERROR: NO IDCO VARIABLE SUPPLIED;
%LET BAD=1;
%END;
%IF %LENGTH(&MVARS)=0 %THEN %DO;
%PUT ERROR: NO MATCHING VARIABLES SUPPLIED;
%LET BAD=1;
%END;
%IF %LENGTH(&WTS)=0 %THEN %DO;
%PUT ERROR: NO WEIGHTS SUPPLIED;
%LET BAD=1;
%END;
%IF %UPCASE(&METHOD)=GREEDY %THEN %DO;
%IF %LENGTH(&SEEDCA)=0 %THEN %DO;
%PUT ERROR: NO SEEDCA VALUE SUPPLIED;
%LET BAD=1;
%END;
%IF %LENGTH(&SEEDCO)=0 %THEN %DO;
%PUT ERROR: NO SEEDCO VALUE SUPPLIED;
%LET BAD=1;
%END;
%END;
%IF %LENGTH(&OUT)=0 %THEN %DO;
%PUT ERROR: NO OUTPUT DATASET SUPPLIED;
%LET BAD=1;
%END;
%IF %UPCASE(&METHOD)^=GREEDY & %UPCASE(&METHOD)^=OPTIMAL %THEN %DO;
%PUT ERROR: METHOD MUST BE GREEDY OR OPTIMAL;
%LET BAD=1;
%END;
%IF (&MINCONT= AND &MAXCONT^= ) OR (&MINCONT^= AND &MAXCONT= )
%THEN %DO;
%PUT ERROR: MINCONT AND MAXCONT MUST BOTH BE SPECIFIED;
%LET BAD=1;
%END;
%LET NVAR=0;
%DO %UNTIL(%SCAN(&MVARS,&NVAR+1,' ')= );
%LET NVAR=%EVAL(&NVAR+1);
%END;
%LET NWTS=0;
%DO %UNTIL(%QSCAN(&WTS,&NWTS+1,' ')= );
%LET NWTS=%EVAL(&NWTS+1);
%END;
%IF &NVAR^= &NWTS %THEN %DO;
%PUT ERROR: #VARS MUST EQUAL #WTS;
%LET BAD=1;
%END;
%LET NK=0;
%IF %QUOTE(&DMAXK)^= %THEN %DO %UNTIL(%QSCAN(&DMAXK,&NK+1,' ')= );
%LET NK=%EVAL(&NK+1);
%END;
%IF &NK>&NVAR %THEN %LET NK=&NVAR;
%DO I=1 %TO &NVAR;
%LET V&I=%SCAN(&MVARS,&I,' ');
%END;
%IF &NWTS>0 %THEN %DO;
DATA _NULL_;
%DO I=1 %TO &NWTS;
%LET W&I=%SCAN(&WTS,&I,' ');
IF &&W&I<0 THEN DO;
PUT 'ERROR: WEIGHTS MUST BE NON-NEGATIVE';
CALL SYMPUT('BAD','1');
END;
%END;
RUN;
%END;
%IF &NK>0 %THEN %DO;
DATA _NULL_;
%DO I=1 %TO &NK;
%LET K&I=%SCAN(&DMAXK,&I,' ');
IF &&K&I<0 THEN DO;
PUT 'ERROR: DMAXK VALUES MUST BE NON-NEGATIVE';
CALL SYMPUT('BAD','1');
END;
%END;
RUN;
%END;
%MACRO DIJ;
%DO I=1 %TO &NVAR-1;
&&W&I*ABS(__CA&I-__CO&I) +
%END;
&&W&NVAR*ABS(__CA&NVAR-__CO&NVAR);
%MEND DIJ;
%MACRO MAX1;
%IF &DMAX^= %THEN %DO;
& __D<=&DMAX
%END;
%DO I=1 %TO &NK;
& ABS(__CA&I-__CO&I)<=&&K&I
%END;
%MEND MAX1;
%MACRO MAX2;
%IF &DMAX= & &NK=0 %THEN %DO;
%IF &time^= %then %do;
if __cotime>__catime then
%end;
output;
%end;
%IF &DMAX^= & &NK=0 %THEN %DO;
IF _COST_<=&DMAX
%if &time^= %then %do;
& __cotime>__catime
%end;
THEN OUTPUT;
%END;
%IF &DMAX= & &NK>0 %THEN %DO;
IF ABS(__CA1-__CO1)<=&K1
%DO I=2 %TO &NK;
& ABS(__CA&I-__CO&I)<=&&K&I
%END;
%if &time^= %then %do;
& __cotime>__catime
%end;
THEN OUTPUT;
%END;
%IF &DMAX^= & &NK>0 %THEN %DO;
IF _COST_<=&DMAX
%DO I=1 %TO &NK;
& ABS(__CA&I-__CO&I)<=&&K&I
%END;
%if &time^= %then %do;
& __cotime>__catime
%end;
THEN OUTPUT;
%END;
%MEND MAX2;
%MACRO LBLS;
%DO I=1 %TO &NVAR;
__CA&I="&&V&I/CASE"
__CO&I="&&V&I/CONTROL"
__DIF&I="&&V&I/ABS. DIFF "
__WT&I="&&V&I/WEIGHT"
%END;
%MEND LBLS;
%MACRO VBLES;
%DO I=1 %TO &NVAR;
__DIF&I
%END;
%DO I=1 %TO &NVAR;
__CA&I __CO&I
%END;
%MEND VBLES;
%MACRO GREEDY;
%GLOBAL BAD2;
DATA __CASE; SET &CASE;
%DO I=1 %TO &NVAR;
%LET MISSTEST=%SCAN(&MVARS,&I,' ');
IF &MISSTEST=. THEN DELETE;
%END;
%IF &TIME^= %THEN %DO;
IF &TIME=. THEN DELETE;
%END;
DATA __CASE; SET __CASE END=EOF;
KEEP __IDCA __CA1-__CA&NVAR __R &mvars
%if &time^= %then %do;
__catime
%end;
;
__IDCA=&IDCA;
%if &time^= %then %do;
__catime=&time;
%end;
%DO I=1 %TO &NVAR;
__CA&I=&&V&I;
%END;
SEED=&SEEDCA;
__R=RANUNI( SEED );
IF EOF THEN CALL SYMPUT('NCA',_N_);
PROC SORT; BY __R __IDCA;
DATA __CONT; SET &CONTROL;
%DO I=1 %TO &NVAR;
%LET MISSTEST=%SCAN(&MVARS,&I,' ');
IF &MISSTEST=. THEN DELETE;
%END;
%IF &TIME^= %THEN %DO;
IF &TIME=. THEN DELETE;
%END;
DATA __CONT; SET __CONT END=EOF;
KEEP __IDCO __CO1-__CO&NVAR __R &mvars
%if &time^= %then %do;
__cotime
%end;
;
__IDCO=&IDCO;
%if &time^= %then %do;
__cotime=&time;
%end;
%DO I=1 %TO &NVAR;
__CO&I=&&V&I;
%END;
SEED=&SEEDCO;
__R=RANUNI( SEED );
IF EOF THEN CALL SYMPUT('NCO',_N_);
RUN;
%LET BAD2=0;
%IF &NCO < %EVAL(&NCA*&NCONTLS) %THEN %DO;
%PUT ERROR: NOT ENOUGH CONTROLS TO MAKE REQUESTED MATCHES;
%LET BAD2=1;
%END;
%IF &BAD2=0 %THEN %DO;
PROC SORT; BY __R __IDCO;
DATA __MATCH;
KEEP __IDCA __CA1-__CA&NVAR __DIJ __MATCH __CONT_N
%if &time^= %then %do;
__catime __cotime
%end;
;
ARRAY __USED(&NCO) $ 1 _TEMPORARY_;
DO __I=1 TO &NCO;
__USED(__I)='0';
END;
DO __I=1 TO &NCONTLS;
DO __J=1 TO &NCA;
SET __CASE POINT=__J;
__SMALL=.;
__MATCH=.;
DO __K=1 TO &NCO;
IF __USED(__K)='0' THEN DO;
SET __CONT POINT=__K;
__D=%DIJ
IF __d^=. & (__SMALL=. | __D<__SMALL) %MAX1
%if &time^= %then %do;
& __cotime > __catime
%end;
THEN DO;
__SMALL=__D;
__MATCH=__K;
__DIJ=__D;
__CONT_N=__I;
END;
END;
END;
IF __MATCH^=. THEN DO;
__USED(__MATCH)='1';
OUTPUT;
END;
END;
END;
STOP;
DATA &OUT;
SET __MATCH;
SET __CONT POINT=__MATCH;
KEEP __IDCA __IDCO __CONT_N __DIJ __CA1-__CA&NVAR
__CO1-__CO&NVAR __DIF1-__DIF&NVAR __WT1-__WT&NVAR
%if &time^= %then %do;
__catime __cotime
%end;
;
LABEL __IDCA="&IDCA/CASE"
__IDCO="&IDCO/CONTROL"
%if &time^= %then %do;
__catime="&time/CASE"
__cotime="&time/CONTROL"
%end;
__CONT_N='CONTROL/NUMBER'
__DIJ='DISTANCE/D_IJ'
%LBLS;
%DO I=1 %TO &NVAR;
__DIF&I=abs(__CA&I-__CO&I);
__WT&I=&&W&I;
%END;
%END;
%MEND GREEDY;
%IF &BAD=0 %THEN %DO;
%IF %UPCASE(&METHOD)=GREEDY %THEN %DO;
%GREEDY
%END;
%ELSE %DO;
%OPTIMAL
%END;
%IF &BAD2=0 %THEN %DO;
PROC SORT DATA=&OUT; BY __IDCA __CONT_N;
proc sort data=__case; by __IDCA;
data &outnmca; merge __case
&out(in=__inout where=(__cont_n=1)); by __idca;
if __inout=0; **non-matches;
proc sort data=__cont; by __IDCO;
proc sort data=&out; by __IDCO;
data &outnmco; merge __cont
&out(in=__inout); by __idco;
if __inout=0; **non-matched controls;
proc sort data=&out; by __IDCA; **re-sort by case id;
%if %upcase(&print)=Y %then %do;
PROC PRINT data=&out LABEL SPLIT='/';
VAR __IDCA __IDCO __CONT_N
%if &time^= %then %do;
__catime __cotime
%end;
__DIJ %VBLES;
sum __dij;
title9'Data listing for matched cases and controls';
footnote
"match macro: case=&case control=&control idca=&idca idco=&idco";
footnote2
" mvars=&mvars wts=&wts dmaxk=&dmaxk dmax=&dmax ncontls=&ncontls";
%if &time^= %then %do;
footnote3"time=&time method=&method seedca=&seedca seedco=&seedco";
%end;
%else %do;
footnote3" method=&method seedca=&seedca seedco=&seedco";
%end;
footnote4" out=&out outnmca=&outnmca outnmco=&outnmco";
run;
title9'Summary data for matched cases and controls';
proc means data=&out n mean sum min max; class __cont_n;
var __dij
%if &nvar >=2 %then %do; __dif1-__dif&nvar __ca1-__ca&nvar
%if &time^= %then %do;
__catime
%end;
__co1-__co&nvar
%if &time^= %then %do;
__cotime
%end;
;
%end;
%else %do;
__dif1 __ca1
%if &time^= %then %do;
__catime
%end;
__co1
%if &time^= %then %do;
__cotime
%end;
;
%end;
run;
proc means data=&outnmca n mean sum min max; var &mvars;
title9'Summary data for NON-matched cases';
run;
proc means data=&outnmco n mean sum min max; var &mvars;
title9'Summary data for NON-matched controls';
run;
%end;
%END;
%END;
title9; footnote;
run;
%MEND MATCH;
%match(case=match.trans20_ccaea_case, control=match.ccae20_controls02, idca=MATCHID, idco=ENROLID, ncontls=3,
mvars=AGE, maxiter=1, wts=1, dmaxk=0, out=match, method=greedy,seedca=2,seedco=2);
... View more