BookmarkSubscribeRSS Feed
abuxton1
Calcite | Level 5

I think that this works with SAS/Base (SAS University edition) as does (1) with SAS/OR. I combined the ideas from the SAS community posting (1) and the example code (2) provided by George Zhu, and Fu-Lin Wang at the Edmonton SAS User Group Meeting May 8, 2013.

(1) https://communities.sas.com/t5/SAS-Programming/Matching-cases-with-controls/td-p/252204
(2) https://www.sas.com/content/dam/SAS/en_ca/User%20Group%20Presentations/Edmonton-User-Group/GeorgeZhu....

The dataset wk03 at the end has cases and controls selected.

 

%let ratio=2;

data cases;
input id age edu race parity wt ht;
datalines;
05 32 9 1 0 90 5.2
12 35 12 1 2 110 5.0
17 40 10 2 1 130 4.9
22 25 14 3 0 110 5.4
29 27 12 2 1 95 5.0
40 28 10 3 2 130 6
;
data controls;
input id age edu race parity wt ht;
datalines;
10 33 11 2 1 98 5.7
11 35 10 2 2 120 5.9
19 42 11 1 3 139 5.9
26 26 14 0 1 99 5.0
31 27 10 2 1 95 5.5
32 29 12 2 2 125 6.2
37 31 10 2 1 190 7.2
43 35 12 1 1 110 5.0
46 45 10 1 1 130 4.9
51 28 12 3 0 110 5.4
55 22 10 1 1 95 5.0
58 28 10 3 2 130 6
70 32 9 1 0 90 5.2
79 37 11 1 2 110 5.0
85 48 14 2 1 130 4.9
88 29 14 3 0 110 5.4
90 25 14 2 1 95 5.0
93 27 11 3 2 130 6
99 35 10 1 0 90 5.2
101 33 11 1 2 110 5.0
114 38 16 2 1 130 4.9
125 25 13 3 0 110 5.4
129 25 10 2 1 95 5.0
130 24 11 3 2 130 6
;

 /* *Find all possible matches */
 /* proc sql;                                                           */
 /* create table matches as                                             */
 /* select                                                              */
 /*     s.id as sampleId,                                               */
 /*     c.id as controlId,                                              */
 /*     euclid((c.age-s.age)/7, (c.edu-s.edu)/5) as distance format=5.2 */
 /* from                                                                */
 /*     sample as s left join                                           */
 /*     controls as c                                                   */
 /*     on  c.age between s.age-3 and s.age+3 and                       */
 /*         c.edu between s.edu-2 and s.edu+2 and                       */
 /*         c.race = s.race and                                         */
 /*         (c.parity=0) = (s.parity=0)                                 */
 /* order by sampleId, distance;                                        */
 /* select * from matches;                                              */
 /* quit;                                                               */
 /* euclid((control_age-case_age)/7, (control_edu-case_edu)/5)          */


data controls_S;
 set controls;
 call streaminit(30259);
 rand_num=rand('UNIFORM'); 
 *for scramble the order of controls;
 run;
 data cases_S;
 set cases;
 call streaminit(40259);
 rand_num=rand('UNIFORM'); 
 *for scramble the order of cases;
 run;

proc sql;
 create table all_matches_S as select 
 a.age as case_age ,
 a.edu as case_edu ,
 a.race as case_race ,
 a.parity as case_parity ,
 a.wt as case_wt ,
 a.ht as case_ht ,
 a.id as case_id ,
 b.age as control_age ,
 b.edu as control_edu ,
 b.race as control_race ,
 b.parity as control_parity ,
 b.wt as control_wt ,
 b.ht as control_ht ,
 b.id as control_id ,
 a.rand_num as case_rand,
 b.rand_num as control_rand ,
 euclid((control_age-case_age)/7, (control_edu-case_edu)/5) as distance
 from cases_S a, controls_S b 
 where 
      abs(case_age - control_age) <= 3
  and abs(case_edu - control_edu) <= 2 
  and  case_race = control_race 
  and  (case_parity=0) = (control_parity=0)
 ;
 *matching condition;
 quit; run;

data all_matches_S;
 set all_matches_S;
 call streaminit(90289);
 match_rand=rand('UNIFORM'); 
 *for scrambling the order cases;
 run;

proc sort data=all_matches_S;
 by distance control_rand match_rand;
 run;


data all_matches_S;
 set all_matches_S;
 by control_rand notsorted;
 if first.control_rand then output;
 run;

proc sort data=all_matches_S;
 by case_rand distance;
 run;

data all_matches_S(drop=count); 
set all_matches_S;
 retain count;
 by case_rand;
 if first.case_rand then count=0;
 count=count+1;
 if count<=&ratio. then output;
 run;


proc sql;
create table wk01 as
select
distinct 
1 as case,
t2.case_id,
t1.*
from
cases as t1 ,
All_matches_s as t2
where
t1.id=t2.case_id
order by t2.case_id
;
quit; run;


proc sql;
create table wk02 as
select
2 as case,
t2.case_id,
t2.control_id, t2.distance label = "attn distance just to check distance" ,
t1.*
from
controls as t1 ,
All_matches_s as t2
where
t1.id=t2.control_id
order by t2.case_id, t2.control_id
;
quit; run;

data wk03;
set wk01 wk02;
run;

proc sort data=wk03;
by case_id case id;
run;

proc print data=wk03 noobs; var case case_id id; run;
1 REPLY 1
Rick_SAS
SAS Super FREQ

What is your question?

 

I am not an expert on case/control matching, but I believe that many SAS programmers now use the PSMATCH procedure for that purpose. See if it will perform the calculations that you need. The PSMATCH procedure was introduced in SAS/STAT 14.2.

hackathon24-white-horiz.png

2025 SAS Hackathon: There is still time!

Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!

Register Now

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 1 reply
  • 607 views
  • 0 likes
  • 2 in conversation