I have two datasets, each with an ID, START_TS, END_TS, and a Value. I want to merge these two and create a merged time intervals, and replace the original Value from dataset 1 with the Value from dataset 2. The logic should involve overlaying the values from dataset2 onto dataset1 where the time intervals from dataset2 exist, while maintaining the original dataset1 values for non-overlapping intervals.
data dataset1; format START_TS END_TS datetime20.; input OBS ID $ START_TS :datetime. END_TS :datetime. Value; datalines; 1 A 01JAN2025:00:00:00 02JAN2025:00:00:00 10 2 A 02JAN2025:00:00:00 03JAN2025:00:00:00 20 3 A 03JAN2025:00:00:00 04JAN2025:00:00:00 10 4 A 04JAN2025:00:00:00 05JAN2025:00:00:00 20 5 A 05JAN2025:00:00:00 06JAN2025:00:00:00 10 6 A 06JAN2025:00:00:00 07JAN2025:00:00:00 20 ; run;
data dataset2; format START_TS END_TS datetime20.; input OBS PT $ START_TS :datetime. END_TS :datetime. Value; datalines; 1 A 01JAN2025:00:00:00 01JAN2025:10:00:00 99 2 A 02JAN2025:08:00:00 02JAN2025:11:00:00 99 3 A 03JAN2025:22:00:00 04JAN2025:02:00:00 99 4 A 06JAN2025:03:00:00 06JAN2025:05:00:00 99 5 A 06JAN2025:23:00:00 07JAN2025:00:00:00 99 ; run;
Dataset wanted: ID PT START_TS END_TS Updated_Value 1 A 01JAN2025:00:00:00 01JAN2025:10:00:00 99 2 A 01JAN2025:10:00:00 02JAN2025:00:00:00 10 3 A 02JAN2025:00:00:00 02JAN2025:08:00:00 20 4 A 02JAN2025:08:00:00 02JAN2025:11:00:00 99 5 A 02JAN2025:11:00:00 03JAN2025:00:00:00 20 6 A 03JAN2025:00:00:00 03JAN2025:22:00:00 10 7 A 03JAN2025:22:00:00 04JAN2025:00:00:00 99 8 A 04JAN2025:00:00:00 04JAN2025:02:00:00 99 9 A 04JAN2025:02:00:00 05JAN2025:00:00:00 20 10 A 05JAN2025:00:00:00 06JAN2025:00:00:00 10 11 A 06JAN2025:00:00:00 06JAN2025:03:00:00 20 12 A 06JAN2025:03:00:00 06JAN2025:05:00:00 99 13 A 06JAN2025:05:00:00 06JAN2025:23:00:00 20 14 A 06JAN2025:23:00:00 07JAN2025:00:00:00 99 My code is below. It mostly works, but does not handle multiple time intervals on the same day from dataset 2 (OBS 4 and 5) correctly. Please help!
*Expand dataset1 into daily intervals;
data expanded_dataset2;
set dataset2;
format daily_start daily_end datetime20.;
/* Split dataset1 into daily intervals */
if not missing(START_TS) and not missing(END_TS) then do;
do date = DATEPART(START_TS) to DATEPART(END_TS);
daily_start = max(START_TS, dhms(date, 0, 0, 0));
daily_end = min(END_TS, dhms(date+1, 0, 0, 0));
output;
end;
end;
keep ID daily_start daily_end Value;
rename daily_start = START_TS daily_end = END_TS;
run;
*Merge and handle overlaps;
proc sql;
create table merged_intervals as
select a.ID,
a.START_TS as Base_START_TS,
a.END_TS as Base_END_TS,
b.START_TS as Overlay_START_TS,
b.END_TS as Overlay_END_TS,
a.Value as Base_Value,
b.Value as Overlay_Value
from dataset1 as a
left join expanded_dataset2 as b
on a.ID = b.ID and
b.END_TS > a.START_TS and
b.START_TS < a.END_TS
order by a.ID, a.START_TS, b.START_TS;
quit;
*Split and prioritize intervals;
data final_intervals;
set merged_intervals;
format START_TS END_TS datetime20.;
/* Interval 1: Before the overlap */
if Overlay_START_TS ^=. and Base_START_TS < Overlay_START_TS then do;
START_TS = Base_START_TS;
END_TS = min(Base_END_TS, Overlay_START_TS);
Value = Base_Value;
output;
end;
/* Interval 2: During the overlap */
if Overlay_START_TS ^=. then do;
START_TS = max(Base_START_TS, Overlay_START_TS);
END_TS = min(Base_END_TS, Overlay_END_TS);
Value = Overlay_Value;
output;
end;
/* Interval 3: After the overlap */
if Overlay_START_TS ^=. and Base_END_TS > Overlay_END_TS then do;
START_TS = Overlay_END_TS;
END_TS = Base_END_TS;
Value = Base_Value;
output;
end;
/* If no overlay exists, keep original interval */
if Overlay_START_TS =. then do;
START_TS = Base_START_TS;
END_TS = Base_END_TS;
Value = Base_Value;
output;
end;
run;
* Remove duplicates and sort;
proc sort data=final_intervals nodupkey;
by ID START_TS END_TS;
run;
... View more