data test1;
infile datalines delimiter = ',';
input ID START:mmddyy10. REVIEWCOMPLETE: mmddyy10. HEALTHDATE:mmddyy10. HEALTHDATEEND:mmddyy10.
END: mmddyy10. CERTIFYDATE: mmddyy10.;
format START REVIEWCOMPLETE HEALTHDATE HEALTHDATEEND END CERTIFYDATE mmddyy10. ;
datalines;
1, , , , , , ,
2, , , , , , ,
3,01/16/2019, 01/23/2019, 01/26/2019, 01/26/2019, 01/26/2019,01/26/2019,
;
Run;
data test2;
infile datalines delimiter = ',';
input ID START:mmddyy10. REVIEW:mmddyy10. REVIEWCOMPLETE: mmddyy10. HEALTHDATE:mmddyy10. HEALTHDATEEND:mmddyy10.
END: mmddyy10. CERTIFYDATE: mmddyy10.;
format START REVIEW REVIEWCOMPLETE HEALTHDATE HEALTHDATEEND END CERTIFYDATE mmddyy10. ;
datalines;
1,01/17/2020,01/29/2020,01/30/2020, , , , ,
2,01/10/2020,01/24/2020, , , , , ,
3, , , , , , , ,
;
Run;
proc sql;
create table table3 as
select * from test1 as a inner join test2 as b
on a.id = b.id;
quit;
In SQL, you need to use the coalesce() function. Here is code with both (SQL and data step) solutions, and a check step to make sure both yield the same result:
data test1;
infile datalines delimiter = ',' dsd;
input ID START:mmddyy10. REVIEWCOMPLETE: mmddyy10. HEALTHDATE:mmddyy10. HEALTHDATEEND:mmddyy10.
END: mmddyy10. CERTIFYDATE: mmddyy10.;
format START REVIEWCOMPLETE HEALTHDATE HEALTHDATEEND END CERTIFYDATE yymmddd10. ;
datalines;
1, , , , , , ,
2, , , , , , ,
3,01/16/2019, 01/23/2019, 01/26/2019, 01/26/2019, 01/26/2019,01/26/2019,
;
data test2;
infile datalines delimiter = ',' dsd;
input ID START:mmddyy10. REVIEW:mmddyy10. REVIEWCOMPLETE: mmddyy10. HEALTHDATE:mmddyy10. HEALTHDATEEND:mmddyy10.
END: mmddyy10. CERTIFYDATE: mmddyy10.;
format START REVIEW REVIEWCOMPLETE HEALTHDATE HEALTHDATEEND END CERTIFYDATE yymmddd10. ;
datalines;
1,01/17/2020,01/29/2020,01/30/2020, , , , ,
2,01/10/2020,01/24/2020, , , , , ,
3, , , , , , , ,
;
proc sql;
create table want1 as
select
t1.id,
coalesce(t1.start,t2.start) as start format=yymmddd10.,
t2.review,
coalesce(t1.reviewcomplete,t2.reviewcomplete) as reviewcomplete format=yymmddd10.,
coalesce(t1.healthdate,t2.healthdate) as healthdate format=yymmddd10.,
coalesce(t1.healthdateend,t2.healthdateend) as healthdateend format=yymmddd10.,
coalesce(t1.end,t2.end) as end format=yymmddd10.,
coalesce(t1.certifydate,t2.certifydate) as certifydate format=yymmddd10.
from test1 t1 full join test2 t2
on t1.id = t2.id;
quit;
data want2;
update test1 test2;
by id;
run;
proc compare base=want1 compare=want2;
run;
As you can see, the data step solution (as it is most often in SAS) is the most simple one.
Do NOT use the asterisk in sql joins, make an explicit list, and use the coalesce() function to overwrite only the missing values.
When I make an explicit list I get the following error. Also, I want to keep the missing dates where missing dates should be therefore I don't think the coalesce function is what I need.
proc sql;
create table table3 as
select ID, START,REVIEW, REVIEWCOMPLETE, HEALTHDATE, HEALTHDATEEND, END, CERTIFYDATE
from test1 as a inner join test2 as b
on a.id = b.id;
quit;
1. >Also, I want to keep the missing dates where missing dates should be therefore I don't think the coalesce function is what I need.
Coalesce does want you asked in your question.
What does missing dates should be mean?
2. A data step with an update statement might be what you want.
data TABLE3; update TABLE1 TABLE2; by ID; run;
In SQL, you need to use the coalesce() function. Here is code with both (SQL and data step) solutions, and a check step to make sure both yield the same result:
data test1;
infile datalines delimiter = ',' dsd;
input ID START:mmddyy10. REVIEWCOMPLETE: mmddyy10. HEALTHDATE:mmddyy10. HEALTHDATEEND:mmddyy10.
END: mmddyy10. CERTIFYDATE: mmddyy10.;
format START REVIEWCOMPLETE HEALTHDATE HEALTHDATEEND END CERTIFYDATE yymmddd10. ;
datalines;
1, , , , , , ,
2, , , , , , ,
3,01/16/2019, 01/23/2019, 01/26/2019, 01/26/2019, 01/26/2019,01/26/2019,
;
data test2;
infile datalines delimiter = ',' dsd;
input ID START:mmddyy10. REVIEW:mmddyy10. REVIEWCOMPLETE: mmddyy10. HEALTHDATE:mmddyy10. HEALTHDATEEND:mmddyy10.
END: mmddyy10. CERTIFYDATE: mmddyy10.;
format START REVIEW REVIEWCOMPLETE HEALTHDATE HEALTHDATEEND END CERTIFYDATE yymmddd10. ;
datalines;
1,01/17/2020,01/29/2020,01/30/2020, , , , ,
2,01/10/2020,01/24/2020, , , , , ,
3, , , , , , , ,
;
proc sql;
create table want1 as
select
t1.id,
coalesce(t1.start,t2.start) as start format=yymmddd10.,
t2.review,
coalesce(t1.reviewcomplete,t2.reviewcomplete) as reviewcomplete format=yymmddd10.,
coalesce(t1.healthdate,t2.healthdate) as healthdate format=yymmddd10.,
coalesce(t1.healthdateend,t2.healthdateend) as healthdateend format=yymmddd10.,
coalesce(t1.end,t2.end) as end format=yymmddd10.,
coalesce(t1.certifydate,t2.certifydate) as certifydate format=yymmddd10.
from test1 t1 full join test2 t2
on t1.id = t2.id;
quit;
data want2;
update test1 test2;
by id;
run;
proc compare base=want1 compare=want2;
run;
As you can see, the data step solution (as it is most often in SAS) is the most simple one.
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.