Hello! Sorry I am new to the SAS community and did not know you could do that. Here are my code and log. I was hoping there would be an easier way to get my desired results utilizing PROC SQL. Thank you for your response %macro dist(data=,id=,group=,mvars=,wts=,dmax=,dmaxk=,
time=,transf=0,dist=1,out=_dist,printd=n,
vmatch=n,a=,b=,lilm=,outm=_match,printm=n,summatch=n);
%global firstco lastco nvar _match; *10.22.03--added _match;
%let bad=0;
%IF %LENGTH(&MVARS)=0 %THEN %DO;
%PUT 'ERROR: NO MATCHING VARIABLES SUPPLIED';
%LET BAD=1;
%END;
%IF %LENGTH(&WTS)=0 %THEN %DO;
%PUT 'ERROR: NO WEIGHTS SUPPLIED';
%LET BAD=1;
%END;
%LET NVAR=0;
%DO %UNTIL(%SCAN(&MVARS,&NVAR+1,' ')= );
%LET NVAR=%EVAL(&NVAR+1);
%END;
%LET NWTS=0;
%DO %UNTIL(%SCAN(&WTS,&NWTS+1,' ')= );
%LET NWTS=%EVAL(&NWTS+1);
%END;
%IF &NVAR^= &NWTS %THEN %DO;
%PUT 'ERROR: #VARS MUST EQUAL #WTS';
%LET BAD=1;
%END;
%LET NK=0;
%IF %QUOTE(&DMAXK)^= %THEN %DO %UNTIL(%QSCAN(&DMAXK,&NK+1,' ')= );
%LET NK=%EVAL(&NK+1);
%END;
%IF &NK>&NVAR %THEN %LET NK=&NVAR;
%DO I=1 %TO &NVAR;
%LET V&I=%SCAN(&MVARS,&I,' ');
%END;
%IF &NWTS>0 %THEN %DO;
DATA _NULL_;
%DO I=1 %TO &NWTS;
%LET W&I=%SCAN(&WTS,&I,' ');
IF &&W&I<0 THEN DO;
PUT 'ERROR: WEIGHTS MUST BE NON-NEGATIVE';
CALL SYMPUT('BAD','1');
END;
%END;
RUN;
%END;
%IF &NK>0 %THEN %DO;
DATA _NULL_;
%DO I=1 %TO &NK;
%LET K&I=%SCAN(&DMAXK,&I,' ');
IF &&K&I<0 THEN DO;
PUT 'ERROR: DMAXK VALUES MUST BE NON-NEGATIVE';
CALL SYMPUT('BAD','1');
END;
%END;
RUN;
%END;
%IF %upcase(&vmatch)=Y
and (&a= or &b= or &lilm= ) %then %do;
%PUT 'ERROR: VMATCH option, but missing a, b, or lilm';
%LET BAD=1;
%END;
%if &bad=0 %then %do;
*** delete obs with missing mvar or id var from input dataset;
data _check;
set &data;
dum=.;
__id=&id;
if __id="" then delete;
%do z=1 %to &nvar;
if %scan(&mvars,&z)=. then delete;
%end;
%if &time^= %then %do;
if &time lt 0 then delete;
%end;
keep __id &id &group dum &mvars &time;
run;
proc sort data=_check; by &group __id; **was &id;
*** transform data if requested/separate cases & controls;
%if &transf=1 %then %do;
proc standard data=_check m=0 s=1 out=_stdzd; var &mvars;
data _caco;
set _stdzd;
%end;
%if &transf=2 %then %do;
proc rank data=_check out=_ranks; var &mvars;
data _caco;
set _ranks;
%end;
%if &transf=0 %then %do;
data _caco;
set _check;
%end;
data _case; set _caco;
if &group=1;
rename
%do k=1 %to &nvar;
%scan(&mvars,&k)=__ca&k
%end;
;
%if &time^= %then %do;
__tca=&time;
drop &time;
%end;
%else %do;
__tca=.;
%end;
run;
data _cont; set _caco;
if &group=0;
__contid="_C" || left(put( _n_,best10.)); *assign dummy names to controls;
__labl="_L" || left(put( _n_,best10.)); *assign label names to controls;
call symput(__labl,__id); *assign mvars _L1,_L2, etc to original var names;
rename
%do k=1 %to &nvar;
%scan(&mvars,&k)=__co&k
%end;
;
%if &time^= %then %do;
__tco=&time;
drop &time;
%end;
%else %do;
__tco=.;
%end;
run;
%put &_L1 &_L2 &_L3 &_L4 ;
* proc print data=_cont;
%nobs(dsn=_case,macvar=ncase);
%nobs(dsn=_cont,macvar=ncont);
*run;
**compute distance matrix;
data &out;
keep &id _C1-_C&ncont;
do i=1 to &ncase; **no. cases;
set _case point=i;
array dij[*] _C1-_C&ncont;
do j=1 to &ncont; **no. controls;
set _cont(drop=&id) point=j;
%if &dist=2 %then %do;
**wtd euclidian dist;
dij[j]= sqrt(
%do k=1 %to &nvar;
%scan(&wts,&k)*(__ca&k - __co&k)**2
%if &k<&nvar %then + ;
%end;
);
%end;
%else %do;
**wtd sum absolute diff;
dij[j]=
%do k=1 %to &nvar;
%scan(&wts,&k)*abs(__ca&k - __co&k )
%if &k<&nvar %then + ;
%end;
;
%end;
**total distance caliper;
%if &dmax ^= %then %do;
if dij[j]>&dmax then dij[j]=.; **distance too large;
%end;
** case-control delta not within calipers for var k**;
%if %quote(&dmaxk)^= %then %do;
%do k=1 %to &nvar;
if abs(__ca&k - __co&k)>%scan(&dmaxk,&k,' ') then dij[j]=.;
%end;
%end;
%if &time ^= %then %do;
if __tco le __tca then dij[j]=.; **control j not in risk set for
case i;
%end;
end; ** end of loop for controls;
output;
end; ** end of loop for cases;
stop;
%end;
run;
%if %upcase(&printd)=Y %then %do;
proc print label data=&out;
label
%do zz=1 %to &ncont; _C&zz=&&_L&zz %end;
;
%end;
footnote"Dist macro: data=&data id=&id group=&group mvars=&mvars wts=&wts";
footnote2"dmaxk=&dmaxk dmax=&dmax time=&time transf=&transf dist=&dist out=&out";
run;
%if %upcase(&vmatch)=Y %then %do;
%vmatch(dist=&out,idca=&id,a=&a,b=&b,lilm=&lilm,n=&ncase,
firstco=_C1,lastco=_C&ncont,
print=&printm,fixprt=Y,out=&outm);
%if %upcase(&summatch)=Y %then %do;
data goodca; merge &outm(in=inm) _case(rename=(__id=idca)); by idca;
if inm;
proc sort data=goodca; by idca idco;
* proc print;
proc sort data=&outm; by idco;
proc sort data=_cont; by __id;
data goodco; merge &outm(in=inm) _cont(rename=(__id=idco)); by idco;
if inm;
proc sort data=goodco; by idca idco;
* proc print;
data __goodm; merge goodco goodca; by idca idco;
keep idca idco __dij __dtime __tca __tco
%do i=1 %to &nvar;
__ca&i __co&i __d&i __absd&i
%end;
;
__dij=dij;
%do i=1 %to &nvar;
__d&i=(__ca&i - __co&i); **raw diff;
__absd&i=abs(__ca&i - __co&i); **abs diff;
%end;
__dtime=__tco-__tca;
label
idca="&id/CASE"
idco="&id/CONTROL"
__dij="Weighted/Difference"
__dtime="&time/TIME DIFF"
__tca="&time/CASE TIME"
__tco="&time/CONT TIME"
%do i=1 %to &nvar; %let vvar=%scan(&mvars,&i);
__absd&i="&vvar/ABS. DIFF"
__ca&i="&vvar/CASE"
__co&i="&vvar/CONTROL"
%end;
;
title9"Listing of Matched Cases and Controls";
footnote"Dist macro: data=&data id=&id group=&group mvars=&mvars wts=&wts";
footnote2"dmaxk=&dmaxk dmax=&dmax time=&time transf=&transf dist=&dist out=&out";
footnote3" Variable Optimal Matching option: a=&a b=&b m=&lilm N=&Ncase";
proc print data=__goodm label split='/'; var idca idco __dij
%do i=1 %to &nvar;
__absd&i
%end;
%do i=1 %to &nvar;
__ca&i __co&i
%end;
__dtime __tca __tco; sum __dij;
run;
title9"Summary statistics--case vs control, one obs per matched control";
%if &sysver ge 8 %then %do;
proc means data=__goodm maxdec=3 fw=8
n mean median min p10 p25 p75 p90 max sum;
%end;
%else %do;
proc means data=__goodm maxdec=3
n mean min max sum;
%end;
var __dij
%do i=1 %to &nvar;
__absd&i
%end;
%do i=1 %to &nvar;
__ca&i __co&i
%end;
__dtime __tca __tco;
*** estimate matching var means within matched sets for controls;
proc means data=__goodm n mean noprint; by idca;
var __dij
%do i=1 %to &nvar;
__co&i
%end;
__tco;
output out=mcont n=n_co mean=__dijm
%do i=1 %to &nvar;
__com&i
%end;
__tcom;
data onecase; set __goodm; by idca; if first.idca;
data __camcon; merge onecase mcont; by idca;
keep idca n_co __dijm __dtime __tca __tcom
%do i=1 %to &nvar;
__ca&i __com&i __actd&i __absd&i
%end;
;
%do i=1 %to &nvar;
__absd&i=abs(__ca&i - __com&i);
__actd&i=(__ca&i - __com&i);
%end;
__dtime=__tcom-__tca;
label
n_co="No./CONTROLS"
__dijm="Average/Dij"
__dtime="&time/Mean Time DIFF"
__tcom="&time/Mean CONT TIME"
%do i=1 %to &nvar; %let vvar=%scan(&mvars,&i);
__absd&i="&vvar/Mean ABS. DIFF"
__com&i="&vvar/Mean CONTROL"
%end;
;
/*
proc print data=__camcon label split='/'; var idca n_co
%do i=1 %to &nvar;
__absd&i
%end;
__dtime
%do i=1 %to &nvar;
__ca&i __com&i
%end;
__tca __tcom; sum __dijm;
*/
title9"Summary statistics--case vs mean of matched controls, one obs per case";
%if &sysver ge 8 %then %do;
proc means data=__camcon maxdec=3 fw=8
n mean median min p10 p25 p75 p90 max sum;
%end;
%else %do;
proc means data=__camcon maxdec=3
n mean min max sum;
%end;
var n_co __dijm
%do i=1 %to &nvar;
__absd&i
%end;
%do i=1 %to &nvar;
__ca&i __com&i
%end;
__dtime __tca __tcom;
%end; **end of loop for summatch=y;
%end; ** end of loop for vmatch=y;
run;
footnote;
%mend dist;
%dist(data=PS_NN,id=id_t, group=preterm, mvars=ps, wts=1, dmax=, time=nbthdate,
out=ps_nn1,vmatch=Y,a=3,b=4,lilm=1530917, printm=y,summatch=n, outm=ps_nn2);
LOG OUTPUT:
7391 %dist(data=PS_NN,id=id_t, group=preterm, mvars=ps, wts=1, dmax=, time=nbthdate,
7392 out=ps_nn1,vmatch=Y,a=3,b=4,lilm=1530917, printm=y,summatch=n, outm=ps_nn2);
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: Character values have been converted to numeric values at the places given by: (Line):(Column).
9:121
NOTE: There were 1576812 observations read from the data set WORK.PS_NN.
NOTE: The data set WORK._CHECK has 37914 observations and 6 variables.
NOTE: DATA statement used (Total process time):
real time 0.17 seconds
cpu time 0.17 seconds
NOTE: There were 37914 observations read from the data set WORK._CHECK.
NOTE: The data set WORK._CHECK has 37914 observations and 6 variables.
NOTE: PROCEDURE SORT used (Total process time):
real time 0.02 seconds
cpu time 0.01 seconds
NOTE: There were 37914 observations read from the data set WORK._CHECK.
NOTE: The data set WORK._CACO has 37914 observations and 6 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
NOTE: There were 37914 observations read from the data set WORK._CACO.
NOTE: The data set WORK._CASE has 37914 observations and 6 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
NOTE: Numeric values have been converted to character values at the places given by: (Line):(Column).
28:217
NOTE: There were 37914 observations read from the data set WORK._CACO.
NOTE: The data set WORK._CONT has 0 observations and 8 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
WARNING: Apparent symbolic reference _L1 not resolved.
WARNING: Apparent symbolic reference _L2 not resolved.
WARNING: Apparent symbolic reference _L3 not resolved.
WARNING: Apparent symbolic reference _L4 not resolved.
&_L1 &_L2 &_L3 &_L4
WARNING: Apparent invocation of macro NOBS not resolved.
NOTE: Line generated by the invoked macro "DIST".
40 * proc print data=_cont; %nobs(dsn=_case,macvar=ncase);
-
180
ERROR 180-322: Statement is not valid or it is used out of proper order.
WARNING: Apparent invocation of macro NOBS not resolved.
NOTE: Line generated by the invoked macro "DIST".
40 %nobs(dsn=_cont,macvar=ncont);
-
180
ERROR 180-322: Statement is not valid or it is used out of proper order.
22: LINE and COLUMN cannot be determined.
NOTE 242-205: NOSPOOL is on. Rerunning with OPTION SPOOL might allow recovery of the LINE and COLUMN where the error has
occurred.
ERROR 22-322: Syntax error, expecting one of the following: a name, ;, _ALL_, _CHARACTER_, _CHAR_, _NUMERIC_.
200: LINE and COLUMN cannot be determined.
NOTE: NOSPOOL is on. Rerunning with OPTION SPOOL might allow recovery of the LINE and COLUMN where the error has occurred.
ERROR 200-322: The symbol is not recognized and will be ignored.
22: LINE and COLUMN cannot be determined.
NOTE 242-205: NOSPOOL is on. Rerunning with OPTION SPOOL might allow recovery of the LINE and COLUMN where the error has
occurred.
ERROR 22-322: Syntax error, expecting one of the following: a name, a quoted string, a numeric constant, a datetime constant,
a missing value, INPUT, PUT.
WARNING: Apparent symbolic reference NCONT not resolved.
ERROR: Missing numeric suffix on a numbered variable list (_C1-_C).
WARNING: Apparent symbolic reference NCASE not resolved.
22: LINE and COLUMN cannot be determined.
NOTE 242-205: NOSPOOL is on. Rerunning with OPTION SPOOL might allow recovery of the LINE and COLUMN where the error has
occurred.
ERROR 22-322: Syntax error, expecting one of the following: a name, (, ;, _ALL_, _CHARACTER_, _CHAR_, _NUMERIC_.
200: LINE and COLUMN cannot be determined.
NOTE: NOSPOOL is on. Rerunning with OPTION SPOOL might allow recovery of the LINE and COLUMN where the error has occurred.
ERROR 200-322: The symbol is not recognized and will be ignored.
WARNING: Apparent symbolic reference NCONT not resolved.
ERROR: Missing numeric suffix on a numbered variable list (_C1-_C).
NOTE: Line generated by the invoked macro "DIST".
41 to &ncont; **no. controls; set _cont(drop=&id) point=j;
-
22
WARNING: Apparent symbolic reference NCONT not resolved.
ERROR 22-322: Syntax error, expecting one of the following: a name, a quoted string, a numeric constant, a datetime constant,
a missing value, INPUT, PUT.
WARNING: Not all variables in the list _00-_0 were found.
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK.PS_NN1 may be incomplete. When this step was stopped there were 0 observations and 2 variables.
WARNING: Data set WORK.PS_NN1 was not replaced because this step was stopped.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
cpu time 0.01 seconds
WARNING: Apparent symbolic reference NCASE not resolved.
WARNING: Apparent symbolic reference NCONT not resolved.
NOTE: Line generated by the macro variable "N".
1 &ncase
-
22
WARNING: Apparent symbolic reference NCASE not resolved.
ERROR 22-322: Syntax error, expecting one of the following: a name, a quoted string, a numeric constant, a datetime constant,
a missing value, INPUT, PUT.
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK.DIST1 may be incomplete. When this step was stopped there were 0 observations and 8 variables.
WARNING: Data set WORK.DIST1 was not replaced because this step was stopped.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
WARNING: Apparent symbolic reference KMV not resolved.
&kmv
WARNING: Apparent symbolic reference KMV not resolved.
ERROR: A character operand was found in the %EVAL function or %IF condition where a numeric operand is required. The condition
was: &kmv gt 0
ERROR: The macro VMATCH will stop executing.
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK.DIST2 may be incomplete. When this step was stopped there were 0 observations and 9 variables.
WARNING: Data set WORK.DIST2 was not replaced because this step was stopped.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
NOTE: Line generated by the invoked macro "DIST".
64 footnote;
-
117
ERROR 117-185: There were 2 unclosed DO blocks.
... View more