Re: time constraint overlap

monday89 · Posted 03-03-2020 08:55 AM

I have the following dataset with overlapping time constraints

id	time_start	time_stop	measure1	measure2
1	1/2/2020 0:00	1/2/2020 21:12	1	3.4
1	1/2/2020 10:20	1/2/2020 10:45	2	4.2

Since one time frame is a subset of longer time frame, i need to divide them to something like this:

id	time_start	time_stop	measure1	measure2
1	1/2/2020 0:00	1/2/2020 10:20	1	3.4
1	1/2/2020 10:20	1/2/2020 10:45	2	4.2
1	1/2/2020 10:45	1/2/2020 21:12	1	3.4

I am not sure how to start this but this is my attempt

data work.two;

merge work.one

work.one(keep=id time_start measure1 measure2

firstobs=2

rename=(id=_id time_start=_start_date));

format new_stop_date datetime20.;

if id=_id and (_start_date < time_stop and time_stop > time_start)

then new_stop_date=_start_date;

else new_stop_date=time_stop;

rename time_start=new_start_date;

drop time_stop _:;

run;

ed_sas_member · Posted 03-03-2020 02:22 PM

Hi @monday89

Here is an attempt to achieve this:

data have;
	infile datalines dlm="09"x;
	input id time_start:e8601dt16. time_stop:e8601dt16. measure1 measure2;
	format time_start time_stop e8601dt16.;
	datalines;
1	2020-01-02T00:00	2020-01-02T21:12	1	3.4
1	2020-01-02T10:20	2020-01-02T10:45	2	4.2
;
run;

/* Build the 'structure' of the table: expand date times to avoid overlapping */
proc sql;
	create table bound as
	select id, time_start from have
	union
	select id, time_stop from have;
quit;

data structure;
	set bound;
	set bound (firstobs=2 rename=(id=id2 time_start=time_stop));
	if id ne id2 then delete;
	drop id2;
run;

/* Merge the structure with the initial dataset to retrieve measure1 and measure2 */
proc sql;
	create table want as
	select a.*, b.measure1, measure2
	from structure as a inner join
		 have as b
	on a.id=b.id and
	   a.time_start >= b.time_start and
	   a.time_stop <= b.time_stop
	order by a.id, a.time_start, a.time_stop;
quit;

-> Please note that there are two records for the range 10:20-10:45, so as to keep all information from your dataset. Indeed, the measures related to the first observation occurred between 00:00 and 21:12, so it includes the interval 10:20-10:45. Why not keeping this information? If you want to keep only one, what is the rule to prioritize one or the other?

Best,

monday89 · Posted 03-03-2020 03:16 PM

@ed_sas_member Thanks this works! I would like to keep the only one row 10:15 - 10:21 with corresponding SAME measure1 and measure2. How do I restrict that?

ed_sas_member · Posted 03-04-2020 03:28 AM

Hi @monday89

We are almost there

I have added a new section at the very end of the program to manage 'duplicates' according to your request:

data have;
	infile datalines dlm="09"x;
	input id time_start:e8601dt16. time_stop:e8601dt16. measure1 measure2;
	format time_start time_stop e8601dt16.;
	datalines;
1	2020-01-02T00:00	2020-01-02T21:12	1	3.4
1	2020-01-02T10:20	2020-01-02T10:45	2	4.2
;
run;

/* Build the 'structure' of the table: expand date times to avoid overlapping */
proc sql;
	create table bound as
	select id, time_start from have
	union
	select id, time_stop from have;
quit;

data structure;
	set bound;
	set bound (firstobs=2 rename=(id=id2 time_start=time_stop));
	if id ne id2 then delete;
	drop id2;
run;

/* Merge the structure with the initial dataset to retrieve measure1 and measure2 */
proc sql;
	create table want_all as
	select a.*, b.measure1, b.measure2
	from structure as a inner join
		 have as b
	on a.id=b.id and
	   a.time_start >= b.time_start and
	   a.time_stop <= b.time_stop
	order by a.id, a.time_start, a.time_stop;
quit;

/* Manage "duplicate" records */
	
	/* Part1 of the table : no duplicates */
proc sql;
	create table want_all_flag_unique as
	select *
	from want_all
	group by id, time_start, time_stop
	having count(*) = 1;
quit;

	/* Part2 of the table : duplicates -> keep the original record */
proc sql;
	create table want_all_flag_duplicates as
	select * from have
	intersect all
	select * from want_all group by id, time_start, time_stop having count(*) > 1;
quit;

	/* Merge part1 and part2 */

proc sql;
	create table want_unique as
	select * from want_all_flag_unique
	union all
	select * from want_all_flag_duplicates
	order by id, time_start, time_stop;
quit;

All the best,

time constraint overlap