Given data have;
infile datalines missover delimiter="|" dsd;
input id :$20. (start_date end_date) (:date9.) (attribute_1 attribute_2 attribute_3 attribute_4) ($);
format start_date end_date date9.;
datalines; ID1|01MAR2014|31DEC9999|BIG|YES||
ID2|01SEP2015|30NOV2020|||TWO|
ID2|01SEP2015|31DEC9999|SMALL|||
ID2|01AUG2021|31DEC9999|||TWO|
ID3|01DEC2014|31MAY2016||YES||
ID3|01DEC2014|29JUN2017||||OK
ID3|01DEC2014|31DEC9999|MEDIUM|||
ID3|31MAR2015|29SEP2017|||ONE|
ID3|30JUN2017|31DEC9999||YES||TBD
ID3|30SEP2017|31DEC9999|||ONE, TWO|
; I would like to get continuous validity ranges for each id with the correct attributes at each of them. The desired output would be like this: +-----+------------+-----------+-------------+-------------+-------------+-------------+
| id | start_date | end_date | attribute_1 | attribute_2 | attribute_3 | attribute_4 |
+-----+------------+-----------+-------------+-------------+-------------+-------------+
| ID1 | 01MAR2014 | 31DEC9999 | BIG | YES | | |
| ID2 | 01SEP2015 | 30NOV2020 | SMALL | | TWO | |
| ID2 | 01DEC2020 | 31JUL2021 | SMALL | | | |
| ID2 | 01AUG2021 | 31DEC9999 | SMALL | | TWO | |
| ID3 | 01DEC2014 | 30MAR2015 | MEDIUM | YES | | OK |
| ID3 | 31MAR2015 | 31MAY2016 | MEDIUM | YES | ONE | OK |
| ID3 | 01JUN2016 | 29JUN2016 | MEDIUM | | ONE | OK |
| ID3 | 30JUN2016 | 29SEP2017 | MEDIUM | YES | ONE | TBD |
| ID3 | 30SEP2017 | 31DEC9999 | MEDIUM | YES | ONE, TWO | TBD |
+-----+------------+-----------+-------------+-------------+-------------+-------------+ I found a way of doing it using joins, but would like to know if there exist a better way of doing the following: data all_intervals;
set have(keep= id start_date end_date);
_start = start_date; output;
_end = start_date-1; output;
_end = end_date; output;
if end_date < '31DEC9999'd then do;
_start = end_date+1; output;
end;
run;
proc sql;
create table all_intervals as
select distinct t1.id, t1._start, t2._end
from all_intervals t1, all_intervals t2
where t1.id = t2.id and t2._end > t1._start
;
quit;
data all_intervals;
set all_intervals;
by id _start;
if first.id or first._start;
run;
proc sql noprint;
select 'max(t1.'||NAME||') as '||NAME into :attributes separated by ','
from sashelp.vcolumn
where libname = "WORK" and memname = "HAVE" and upcase(name) not in ('ID', "_START", "_END")
;
quit;
proc sql;
create table merge as
select t2.id, t2._start as start_date, t2._end as end_date, &attributes.
from have t1 right join all_intervals t2
on t1.id = t2.id
and ((t2._start <= t1.start_date <= t2._end)
or (t2._start <= t1.end_date <= t2._end)
or (t2._start >= t1.start_date and t2._end <= t1.end_date))
group by t2.id, t2._start
order by t2.id, t2._start
;
quit;
proc sort data=merge out=want nodupkey; by id start_date end_date; run; The above produce the expected output.
... View more