I think that this works with SAS/Base (SAS University edition) as does (1) with SAS/OR. I combined the ideas from the SAS community posting (1) and the example code (2) provided by George Zhu, and Fu-Lin Wang at the Edmonton SAS User Group Meeting May 8, 2013.
(1) https://communities.sas.com/t5/SAS-Programming/Matching-cases-with-controls/td-p/252204
(2) https://www.sas.com/content/dam/SAS/en_ca/User%20Group%20Presentations/Edmonton-User-Group/GeorgeZhu....
The dataset wk03 at the end has cases and controls selected.
%let ratio=2;
data cases;
input id age edu race parity wt ht;
datalines;
05 32 9 1 0 90 5.2
12 35 12 1 2 110 5.0
17 40 10 2 1 130 4.9
22 25 14 3 0 110 5.4
29 27 12 2 1 95 5.0
40 28 10 3 2 130 6
;
data controls;
input id age edu race parity wt ht;
datalines;
10 33 11 2 1 98 5.7
11 35 10 2 2 120 5.9
19 42 11 1 3 139 5.9
26 26 14 0 1 99 5.0
31 27 10 2 1 95 5.5
32 29 12 2 2 125 6.2
37 31 10 2 1 190 7.2
43 35 12 1 1 110 5.0
46 45 10 1 1 130 4.9
51 28 12 3 0 110 5.4
55 22 10 1 1 95 5.0
58 28 10 3 2 130 6
70 32 9 1 0 90 5.2
79 37 11 1 2 110 5.0
85 48 14 2 1 130 4.9
88 29 14 3 0 110 5.4
90 25 14 2 1 95 5.0
93 27 11 3 2 130 6
99 35 10 1 0 90 5.2
101 33 11 1 2 110 5.0
114 38 16 2 1 130 4.9
125 25 13 3 0 110 5.4
129 25 10 2 1 95 5.0
130 24 11 3 2 130 6
;
/* *Find all possible matches */
/* proc sql; */
/* create table matches as */
/* select */
/* s.id as sampleId, */
/* c.id as controlId, */
/* euclid((c.age-s.age)/7, (c.edu-s.edu)/5) as distance format=5.2 */
/* from */
/* sample as s left join */
/* controls as c */
/* on c.age between s.age-3 and s.age+3 and */
/* c.edu between s.edu-2 and s.edu+2 and */
/* c.race = s.race and */
/* (c.parity=0) = (s.parity=0) */
/* order by sampleId, distance; */
/* select * from matches; */
/* quit; */
/* euclid((control_age-case_age)/7, (control_edu-case_edu)/5) */
data controls_S;
set controls;
call streaminit(30259);
rand_num=rand('UNIFORM');
*for scramble the order of controls;
run;
data cases_S;
set cases;
call streaminit(40259);
rand_num=rand('UNIFORM');
*for scramble the order of cases;
run;
proc sql;
create table all_matches_S as select
a.age as case_age ,
a.edu as case_edu ,
a.race as case_race ,
a.parity as case_parity ,
a.wt as case_wt ,
a.ht as case_ht ,
a.id as case_id ,
b.age as control_age ,
b.edu as control_edu ,
b.race as control_race ,
b.parity as control_parity ,
b.wt as control_wt ,
b.ht as control_ht ,
b.id as control_id ,
a.rand_num as case_rand,
b.rand_num as control_rand ,
euclid((control_age-case_age)/7, (control_edu-case_edu)/5) as distance
from cases_S a, controls_S b
where
abs(case_age - control_age) <= 3
and abs(case_edu - control_edu) <= 2
and case_race = control_race
and (case_parity=0) = (control_parity=0)
;
*matching condition;
quit; run;
data all_matches_S;
set all_matches_S;
call streaminit(90289);
match_rand=rand('UNIFORM');
*for scrambling the order cases;
run;
proc sort data=all_matches_S;
by distance control_rand match_rand;
run;
data all_matches_S;
set all_matches_S;
by control_rand notsorted;
if first.control_rand then output;
run;
proc sort data=all_matches_S;
by case_rand distance;
run;
data all_matches_S(drop=count);
set all_matches_S;
retain count;
by case_rand;
if first.case_rand then count=0;
count=count+1;
if count<=&ratio. then output;
run;
proc sql;
create table wk01 as
select
distinct
1 as case,
t2.case_id,
t1.*
from
cases as t1 ,
All_matches_s as t2
where
t1.id=t2.case_id
order by t2.case_id
;
quit; run;
proc sql;
create table wk02 as
select
2 as case,
t2.case_id,
t2.control_id, t2.distance label = "attn distance just to check distance" ,
t1.*
from
controls as t1 ,
All_matches_s as t2
where
t1.id=t2.control_id
order by t2.case_id, t2.control_id
;
quit; run;
data wk03;
set wk01 wk02;
run;
proc sort data=wk03;
by case_id case id;
run;
proc print data=wk03 noobs; var case case_id id; run;
What is your question?
I am not an expert on case/control matching, but I believe that many SAS programmers now use the PSMATCH procedure for that purpose. See if it will perform the calculations that you need. The PSMATCH procedure was introduced in SAS/STAT 14.2.
It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.