BookmarkSubscribeRSS Feed
monday89
Fluorite | Level 6

I have the following dataset with overlapping time constraints

id time_start time_stop measure1 measure2
1 1/2/2020 0:00 1/2/2020 21:12 1 3.4
1 1/2/2020 10:20 1/2/2020 10:45 2 4.2
         

 

Since one time frame is a subset of longer time frame, i need to divide them to something like this: 

 

id time_start time_stop measure1 measure2
1 1/2/2020 0:00 1/2/2020 10:20 1 3.4
1 1/2/2020 10:20 1/2/2020 10:45 2 4.2
1 1/2/2020 10:45 1/2/2020 21:12 1 3.4

 

I am not sure how to start this but this is my attempt

 

data work.two;

      merge work.one

work.one(keep=id time_start measure1 measure2  

firstobs=2

                                rename=(id=_id time_start=_start_date));

 

      format new_stop_date datetime20.;

      if id=_id and (_start_date < time_stop and time_stop > time_start)

then new_stop_date=_start_date;

      else new_stop_date=time_stop;

 

      rename time_start=new_start_date;

      drop time_stop _:;

run;

3 REPLIES 3
ed_sas_member
Meteorite | Level 14

Hi @monday89 

 

Here is an attempt to achieve this:

data have;
	infile datalines dlm="09"x;
	input id time_start:e8601dt16. time_stop:e8601dt16. measure1 measure2;
	format time_start time_stop e8601dt16.;
	datalines;
1	2020-01-02T00:00	2020-01-02T21:12	1	3.4
1	2020-01-02T10:20	2020-01-02T10:45	2	4.2
;
run;

/* Build the 'structure' of the table: expand date times to avoid overlapping */
proc sql;
	create table bound as
	select id, time_start from have
	union
	select id, time_stop from have;
quit;

data structure;
	set bound;
	set bound (firstobs=2 rename=(id=id2 time_start=time_stop));
	if id ne id2 then delete;
	drop id2;
run;

/* Merge the structure with the initial dataset to retrieve measure1 and measure2 */
proc sql;
	create table want as
	select a.*, b.measure1, measure2
	from structure as a inner join
		 have as b
	on a.id=b.id and
	   a.time_start >= b.time_start and
	   a.time_stop <= b.time_stop
	order by a.id, a.time_start, a.time_stop;
quit;

Capture d’écran 2020-03-03 à 20.17.19.png

 

-> Please note that there are two records for the range 10:20-10:45, so as to keep all information from your dataset. Indeed, the measures related to the first observation occurred between 00:00 and 21:12, so it includes the interval 10:20-10:45. Why not keeping this information? If you want to keep only one, what is the rule to prioritize one or the other?

 

Best,

monday89
Fluorite | Level 6

@ed_sas_member Thanks this works! I would like to keep the only one row 10:15 - 10:21 with corresponding SAME measure1 and measure2. How do I restrict that?

ed_sas_member
Meteorite | Level 14

Hi @monday89 

 

We are almost there Smiley Happy

 

I have added a new section at the very end of the program to manage 'duplicates' according to your request:

data have;
	infile datalines dlm="09"x;
	input id time_start:e8601dt16. time_stop:e8601dt16. measure1 measure2;
	format time_start time_stop e8601dt16.;
	datalines;
1	2020-01-02T00:00	2020-01-02T21:12	1	3.4
1	2020-01-02T10:20	2020-01-02T10:45	2	4.2
;
run;

/* Build the 'structure' of the table: expand date times to avoid overlapping */
proc sql;
	create table bound as
	select id, time_start from have
	union
	select id, time_stop from have;
quit;

data structure;
	set bound;
	set bound (firstobs=2 rename=(id=id2 time_start=time_stop));
	if id ne id2 then delete;
	drop id2;
run;

/* Merge the structure with the initial dataset to retrieve measure1 and measure2 */
proc sql;
	create table want_all as
	select a.*, b.measure1, b.measure2
	from structure as a inner join
		 have as b
	on a.id=b.id and
	   a.time_start >= b.time_start and
	   a.time_stop <= b.time_stop
	order by a.id, a.time_start, a.time_stop;
quit;

/* Manage "duplicate" records */
	
	/* Part1 of the table : no duplicates */
proc sql;
	create table want_all_flag_unique as
	select *
	from want_all
	group by id, time_start, time_stop
	having count(*) = 1;
quit;

	/* Part2 of the table : duplicates -> keep the original record */
proc sql;
	create table want_all_flag_duplicates as
	select * from have
	intersect all
	select * from want_all group by id, time_start, time_stop having count(*) > 1;
quit;

	/* Merge part1 and part2 */

proc sql;
	create table want_unique as
	select * from want_all_flag_unique
	union all
	select * from want_all_flag_duplicates
	order by id, time_start, time_stop;
quit;

Capture d’écran 2020-03-04 à 09.22.07.png

 

All the best,

 

 

 

 

hackathon24-white-horiz.png

2025 SAS Hackathon: There is still time!

Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!

Register Now

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 3 replies
  • 974 views
  • 1 like
  • 2 in conversation