I have a dataset which contains many events that contains clients records with start and end dates.
Whole events should be those from clients where there are intersecting dates from the same suppliers.
data have;
input event:$ client:$ supplier:$ start:date9. finish:date9. spend @@@@@;
format start date9. finish date9.;
datalines;
001 x1 SU1 01JAN2022 10JAN2022 10
002 y2 SU3 21DEC2021 22DEC2021 50
003 x1 SU1 05JAN2022 10JAN2022 5
004 x1 SU1 31DEC2021 11JAN2022 10
005 z3 SU2 01JAN2022 05JAN2022 15
006 x1 SU1 13JAN2022 15JAN2022 6
;
run;
This is the data I want.
For example client x1 has 3 events that intersect dates and one that does not.
I also need to sum the spend and also make a note in the event ID that I've combined multiple events into one.
data have;
input event:$ client:$ supplier:$ start:date9. finish:date9. spend @@@@@;
format start date9. finish date9.;
datalines;
001 x1 SU1 01JAN2022 10JAN2022 10
002 y2 SU3 21DEC2021 22DEC2021 50
003 x1 SU1 05JAN2022 10JAN2022 5
004 x1 SU1 31DEC2021 11JAN2022 10
005 z3 SU2 01JAN2022 05JAN2022 15
006 x1 SU1 13JAN2022 15JAN2022 6
;
run;
data temp;
set have;
do date=start to finish;
output;
end;
format date date9.;
drop start finish;
run;
proc sort data=temp;by client supplier date;run;
data temp2;
sum=0;
do until(last.date);
set temp;
by client supplier date;
length events $ 200;
events=catx(',',events,event);
sum+spend;
end;
drop event spend;
run;
data temp3;
set temp2;
by client supplier;
if first.supplier or dif(date) ne 1 then group+1;
run;
proc sql;
create table want as
select group,
(select distinct events from temp3 where group=a.group having length(events)=max(length(events))) as event,
max(client) as client,max(supplier) as supplier,
min(date) as start format=date9.,max(date) as finish format=date9.,max(sum) as spend
from temp3 as a
group by group;
quit;
data have;
input event:$ client:$ supplier:$ start:date9. finish:date9. spend @@@@@;
format start date9. finish date9.;
datalines;
001 x1 SU1 01JAN2022 10JAN2022 10
002 y2 SU3 21DEC2021 22DEC2021 50
003 x1 SU1 05JAN2022 10JAN2022 5
004 x1 SU1 31DEC2021 11JAN2022 10
005 z3 SU2 01JAN2022 05JAN2022 15
006 x1 SU1 13JAN2022 15JAN2022 6
;
run;
data temp;
set have;
do date=start to finish;
output;
end;
format date date9.;
drop start finish;
run;
proc sort data=temp;by client supplier date;run;
data temp2;
sum=0;
do until(last.date);
set temp;
by client supplier date;
length events $ 200;
events=catx(',',events,event);
sum+spend;
end;
drop event spend;
run;
data temp3;
set temp2;
by client supplier;
if first.supplier or dif(date) ne 1 then group+1;
run;
proc sql;
create table want as
select group,
(select distinct events from temp3 where group=a.group having length(events)=max(length(events))) as event,
max(client) as client,max(supplier) as supplier,
min(date) as start format=date9.,max(date) as finish format=date9.,max(sum) as spend
from temp3 as a
group by group;
quit;
What if a pair of date ranges don't intersect, but are contiguos? I.e. what if you have these two start/finish pairs?
Start | Finish |
05jan2022 | 12jan2022 |
13jan2022 | 20jan2022 |
Should these remain as two distinct events, or should they be joined as a single event from 05jan2022 through 20jan2022?
What is your naming convention for merged events?
Seems to do want you want, but needs tweaking depending on your answers to the questions asked by @mkeintz.
proc sort data=have out=sorted;
by client start;
run;
data want;
set sorted;
by client;
length first_event $ 3;
retain first_start first_finish first_event;
format first_start first_finish date9.;
if first.client then do;
sum_spend = 0;
first_start = start;
first_finish = finish;
first_event = event;
end;
if event < first_event then do;
first_event = event;
end;
if start < first_finish then do;
sum_spend + spend;
end;
else do;
substr(first_event, 1, 1) = 'M';
output;
sum_spend = spend;
first_start = start;
first_event = event;
end;
if last.client then do;
first_finish = finish;
output;
end;
drop event start finish spend;
rename
first_event = event
first_start = start
first_finish = finish
sum_spend = spend
;
run;
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.