/* Importing the Reference Table */
data reference_table;
input ID_key Minutes Hospital $;
datalines;
1 4 A
2 1 B
3 8 A
4 6 C
5 9 F
;
run;
/* Importing the Reference Table */
data table2;
input ID_key score$;
datalines;
ID key Score
1
1
1 3
2 2
3
4 1
4
5
5
;
run;
data table3;
input ID_key score2$;
datalines;
ID key Score 2
1 Positive
2 N/A
2 Positive
2 N/A
3 Negative
3
3 Negative
4
4
5 Non-conclusive
5 Non-conclusive
run;
/* Step 1: Flagging numeric variable in Table 2 */
data table2_flagged;
set table2;
if not missing(Score) then Score_Flag = 1;
else Score_Flag = 0;
run;
/* Step 2: Flagging categorical variable in Table 3 */
data table3_flagged;
set table3;
if not missing(Score2) then Score2_Flag = 1;
else Score2_Flag = 0;
run;
/* Step 3: Aggregating flags for Table 2 */
proc sql;
create table table2_agg as
select ID_key, max(Score_Flag) as Score
from table2_flagged
group by ID_key;
quit;
/* Step 4: Aggregating flags for Table 3 */
proc sql;
create table table3_agg as
select ID_key, max(Score2_Flag) as Score2
from table3_flagged
group by ID_key;
quit;
/* Step 5: Merging all tables */
proc sql;
create table final_table as
select ref.ID_key, ref.Minutes, ref.Hospital,
coalesce(t2.Score, 0) as Score, /* Default to 0 if missing */
coalesce(t3.Score2, 0) as Score2 /* Default to 0 if missing */
from reference_table as ref
left join table2_agg as t2 on ref.ID_key = t2.ID_key
left join table3_agg as t3 on ref.ID_key = t3.ID_key;
quit;
/* Step 6: Adding dependent flags (example) */
data final_table;
set final_table;
if Score = 1 and Score2 = 1 then Valid_Flag = 1;
else Valid_Flag = 0;
run; To tackle this problem, you can approach it step by step by: Creating flags in individual tables for numeric and categorical variables. Merging the tables carefully to ensure the duplication issue doesn't inflate the number of records. Combining the results to achieve your desired "want" table with the valid records marked by flags.
... View more