Exactly, this is one of the problems. I can have multiple values per join_id. I tried again to put t1.* and t2.* at the end of variable list or even after the case I still get some of the observations missing. proc sql;
create table data3 as select
coalesce(t1.join_id, t2.join_id) as join_id, t1.*, t2.*
case
when not missing (t1.join_id) and not missing (t2.join_id) then 3
when missing (t1.join_id) and not missing (t2.join_id) then 2
when not missing (t1.join_id) and missing (t2.join_id) then 1 else 0
end as join_stat,
from data1 full join data2 on t1.join_id=t2.join_id;
quit; or proc sql;
create table data3 as select
coalesce(t1.join_id, t2.join_id) as join_id,
case
when not missing (t1.join_id) and not missing (t2.join_id) then 3
when missing (t1.join_id) and not missing (t2.join_id) then 2
when not missing (t1.join_id) and missing (t2.join_id) then 1 else 0
end as join_stat,
t1.*, t2.*
from data1 full join data2 on t1.join_id=t2.join_id;
quit;
... View more