Hi @monday89
We are almost there
I have added a new section at the very end of the program to manage 'duplicates' according to your request:
data have;
infile datalines dlm="09"x;
input id time_start:e8601dt16. time_stop:e8601dt16. measure1 measure2;
format time_start time_stop e8601dt16.;
datalines;
1 2020-01-02T00:00 2020-01-02T21:12 1 3.4
1 2020-01-02T10:20 2020-01-02T10:45 2 4.2
;
run;
/* Build the 'structure' of the table: expand date times to avoid overlapping */
proc sql;
create table bound as
select id, time_start from have
union
select id, time_stop from have;
quit;
data structure;
set bound;
set bound (firstobs=2 rename=(id=id2 time_start=time_stop));
if id ne id2 then delete;
drop id2;
run;
/* Merge the structure with the initial dataset to retrieve measure1 and measure2 */
proc sql;
create table want_all as
select a.*, b.measure1, b.measure2
from structure as a inner join
have as b
on a.id=b.id and
a.time_start >= b.time_start and
a.time_stop <= b.time_stop
order by a.id, a.time_start, a.time_stop;
quit;
/* Manage "duplicate" records */
/* Part1 of the table : no duplicates */
proc sql;
create table want_all_flag_unique as
select *
from want_all
group by id, time_start, time_stop
having count(*) = 1;
quit;
/* Part2 of the table : duplicates -> keep the original record */
proc sql;
create table want_all_flag_duplicates as
select * from have
intersect all
select * from want_all group by id, time_start, time_stop having count(*) > 1;
quit;
/* Merge part1 and part2 */
proc sql;
create table want_unique as
select * from want_all_flag_unique
union all
select * from want_all_flag_duplicates
order by id, time_start, time_stop;
quit;
All the best,
... View more