I think that this works with SAS/Base (SAS University edition) as does (1) with SAS/OR. I combined the ideas from the SAS community posting (1) and the example code (2) provided by George Zhu, and Fu-Lin Wang at the Edmonton SAS User Group Meeting May 8, 2013.
(1) https://communities.sas.com/t5/SAS-Programming/Matching-cases-with-controls/td-p/252204
(2) https://www.sas.com/content/dam/SAS/en_ca/User%20Group%20Presentations/Edmonton-User-Group/GeorgeZhu....
The dataset wk03 at the end has cases and controls selected.
%let ratio=2;
data cases;
input id age edu race parity wt ht;
datalines;
05 32 9 1 0 90 5.2
12 35 12 1 2 110 5.0
17 40 10 2 1 130 4.9
22 25 14 3 0 110 5.4
29 27 12 2 1 95 5.0
40 28 10 3 2 130 6
;
data controls;
input id age edu race parity wt ht;
datalines;
10 33 11 2 1 98 5.7
11 35 10 2 2 120 5.9
19 42 11 1 3 139 5.9
26 26 14 0 1 99 5.0
31 27 10 2 1 95 5.5
32 29 12 2 2 125 6.2
37 31 10 2 1 190 7.2
43 35 12 1 1 110 5.0
46 45 10 1 1 130 4.9
51 28 12 3 0 110 5.4
55 22 10 1 1 95 5.0
58 28 10 3 2 130 6
70 32 9 1 0 90 5.2
79 37 11 1 2 110 5.0
85 48 14 2 1 130 4.9
88 29 14 3 0 110 5.4
90 25 14 2 1 95 5.0
93 27 11 3 2 130 6
99 35 10 1 0 90 5.2
101 33 11 1 2 110 5.0
114 38 16 2 1 130 4.9
125 25 13 3 0 110 5.4
129 25 10 2 1 95 5.0
130 24 11 3 2 130 6
;
/* *Find all possible matches */
/* proc sql; */
/* create table matches as */
/* select */
/* s.id as sampleId, */
/* c.id as controlId, */
/* euclid((c.age-s.age)/7, (c.edu-s.edu)/5) as distance format=5.2 */
/* from */
/* sample as s left join */
/* controls as c */
/* on c.age between s.age-3 and s.age+3 and */
/* c.edu between s.edu-2 and s.edu+2 and */
/* c.race = s.race and */
/* (c.parity=0) = (s.parity=0) */
/* order by sampleId, distance; */
/* select * from matches; */
/* quit; */
/* euclid((control_age-case_age)/7, (control_edu-case_edu)/5) */
data controls_S;
set controls;
call streaminit(30259);
rand_num=rand('UNIFORM');
*for scramble the order of controls;
run;
data cases_S;
set cases;
call streaminit(40259);
rand_num=rand('UNIFORM');
*for scramble the order of cases;
run;
proc sql;
create table all_matches_S as select
a.age as case_age ,
a.edu as case_edu ,
a.race as case_race ,
a.parity as case_parity ,
a.wt as case_wt ,
a.ht as case_ht ,
a.id as case_id ,
b.age as control_age ,
b.edu as control_edu ,
b.race as control_race ,
b.parity as control_parity ,
b.wt as control_wt ,
b.ht as control_ht ,
b.id as control_id ,
a.rand_num as case_rand,
b.rand_num as control_rand ,
euclid((control_age-case_age)/7, (control_edu-case_edu)/5) as distance
from cases_S a, controls_S b
where
abs(case_age - control_age) <= 3
and abs(case_edu - control_edu) <= 2
and case_race = control_race
and (case_parity=0) = (control_parity=0)
;
*matching condition;
quit; run;
data all_matches_S;
set all_matches_S;
call streaminit(90289);
match_rand=rand('UNIFORM');
*for scrambling the order cases;
run;
proc sort data=all_matches_S;
by distance control_rand match_rand;
run;
data all_matches_S;
set all_matches_S;
by control_rand notsorted;
if first.control_rand then output;
run;
proc sort data=all_matches_S;
by case_rand distance;
run;
data all_matches_S(drop=count);
set all_matches_S;
retain count;
by case_rand;
if first.case_rand then count=0;
count=count+1;
if count<=&ratio. then output;
run;
proc sql;
create table wk01 as
select
distinct
1 as case,
t2.case_id,
t1.*
from
cases as t1 ,
All_matches_s as t2
where
t1.id=t2.case_id
order by t2.case_id
;
quit; run;
proc sql;
create table wk02 as
select
2 as case,
t2.case_id,
t2.control_id, t2.distance label = "attn distance just to check distance" ,
t1.*
from
controls as t1 ,
All_matches_s as t2
where
t1.id=t2.control_id
order by t2.case_id, t2.control_id
;
quit; run;
data wk03;
set wk01 wk02;
run;
proc sort data=wk03;
by case_id case id;
run;
proc print data=wk03 noobs; var case case_id id; run;
What is your question?
I am not an expert on case/control matching, but I believe that many SAS programmers now use the PSMATCH procedure for that purpose. See if it will perform the calculations that you need. The PSMATCH procedure was introduced in SAS/STAT 14.2.
Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.