I need to match 4 controls to one case on dobyr, region and gender. I have some code already, but the output dataset needs to have the case id added and I can't figure out how to include it on the "matches" dataset. Sample Have population_msa: ENROLID casecontrol DOBYR REGION GENDER 277502 1 1927 2 1 288002 1 1928 3 2 288401 0 1927 2 1 1013602 0 1927 2 1 1108301 0 1927 2 1 1304301 0 1927 2 1 1439101 0 1928 3 2 1461601 0 1928 3 2 1542902 0 1928 3 2 1576203 0 1928 3 2 WANT - for "Matches" dataset ENROLID_control DOBYR_control REGION_control GENDER_control ENROLID_case DOBYR_case REGION_case GENDER_case 288401 0 1927 2 1 277502 1 1927 2 1 1013602 0 1927 2 1 277502 1 1927 2 1 1108301 0 1927 2 1 277502 1 1927 2 1 1304301 0 1927 2 1 277502 1 1927 2 1 1439101 0 1928 3 2 288002 1 1928 3 2 1461601 0 1928 3 2 288002 1 1928 3 2 1542902 0 1928 3 2 288002 1 1928 3 2 1576203 0 1928 3 2 288002 1 1928 3 2 %LET agerange = 0;/*The agerange is how much of a difference you will allow in dobyr for your cases and eligible controls. Remember to set agerange according to how dobyr is reported. If dobyr is reported as years, then agerange is as well. */ %LET ratio = 4;/*insert the number of controls you want for each case*/ /* ‘population’ is your data set containing both cases and possible controls. You must create a variable called ‘casecontrol’ which is marked ‘1’ for cases. A variable called ‘enrolid’ is needed to distinguish the observations/participants from each other; it must be unique for everyone. Besides these two variables, you will need to have the matching variables. In this form of the code, we have ‘sex‘ and ‘dobyr’.*/ DATA com.msa_cases com.msa_controls; SET com.population_msa; IF casecontrol = 1 THEN OUTPUT com.msa_cases; ELSE OUTPUT com.msa_controls; RUN; PROC FREQ NOPRINT DATA=com.msa_cases; TABLES dobyr*sex*region/OUT=com.msa_caseout;/*The controls are matched on dobyr and sex, which are also the names of the variables */ RUN; %MACRO sample(v_dobyr, v_sex, v_count, v_region, v_enrolid); /*The following section is a program within the code, here called ‘sample’. The program ends with the %MEND-statement*/ DATA com.msa_qualify1; SET com.msa_controls; WHERE (&v_dobyr-&agerange <= dobyr <= &v_dobyr+&agerange) AND (sex = "&v_sex") AND (region = "&v_region"); case_dobyr = &v_dobyr; case_sex = "&v_sex"; case_region = "&v_region"; case_enrolid = "&v_enrolid"; SEED = RANUNI(0); PROC SORT; BY SEED; DATA com.msa_qualify2; SET com.msa_qualify1 NOBS=totobs; IF _N_ <= &v_count*∶ IF &v_count*&ratio <= totobs THEN tag = 'yes'; ELSE tag = 'no'; PROC APPEND BASE=com.msa_matches DATA=com.msa_qualify2 force; /*new data set ‘matches’ will contain the matched controls*/ PROC SORT DATA=com.msa_qualify2 OUT=com.msa_temp1 (KEEP=enrolid); BY enrolid; PROC SORT DATA=com.msa_controls OUT=com.msa_temp2; BY enrolid; DATA com.msa_controls; /* controls already matched are removed and can not be matched again */ MERGE com.msa_temp1(IN=in1) com.msa_temp2(IN=in2); BY enrolid; IF in2 AND NOT in1; %MEND sample; DATA _NULL_; /*This data step calls the macro.*/ SET com.msa_caseout; CALL EXECUTE ('%sample('||dobyr||','||sex||','||count||','||region||')'); RUN; /*if you need to run the code anew, due to errors for instance, you may use this following step to remember to delete the following datasets. Otherwise, DO NOT RUN THIS DATA STEP, since it will delete your matches*/ /*proc datasets nolist library=work;*/ /* delete temp1 temp2 qualify1 qualify2 matches controls; */ /*run;*/ /* The next part is for testing if any of the cases have not received the required number of controls */ PROC FREQ NOPRINT DATA=com.msa_matches; TABLES case_dobyr*case_sex*case_region/OUT=com.msa_con_out; PROC SORT DATA = com.msa_caseout(RENAME= (dobyr=case_dobyr sex=case_sex count=case_cnt region=case_region enrolid=case_enrolid)); BY case_dobyr case_sex case_region; PROC SORT DATA = com.msa_con_out (RENAME= (count=con_cnt)); BY case_dobyr case_sex case_region; DATA com.msa_final (DROP=percent); MERGE com.msa_caseout com.msa_con_out; BY case_dobyr case_sex case_region; con_need = case_cnt*∶ IF con_cnt = . THEN con_cnt = 0; diff = con_cnt-con_need; PROC PRINT DATA = com.msa_final; /*creates a table showing characteristics of the cases who have not received enough matches and how many matches they are missing*/ WHERE diff < 0; TITLE 'Insufficient Matches'; RUN;
... View more