Name | ID | Start_date | End_date |
TPSON | 1 | 01MAY2017:00:00:00 | . |
TPSON | 1 | 01MAR2016:00:00:00 | 31DEC2016:00:00:00 |
TPSON | 1 | 14NOV2014:00:00:00 | . |
RED | 2 | 01MAY2015:00:00:00 | . |
RED | 2 | 01DEC2005:00:00:00 | 30APR2015:00:00:00 |
RICH | 3 | 01NOV2018:00:00:00 | . |
RICH | 3 | 01JAN2018:00:00:00 | 31DEC2018:00:00:00 |
VOR | 4 | 01APR2016:00:00:00 | . |
VOR | 4 | 01OCT2018:00:00:00 | 31OCT2018:00:00:00 |
Fan | 5 | 07JUL2007:00:00:00 | 19OCT2015:00:00:00 |
VIX | 6 | 20AUG2018:00:00:00 | 31DEC2018:00:00:00 |
I have the data set above where members have dates associated to them. members with multiple dates i want to keep the max start_date and the end_date that is empty and also keep the members with both start and end date
output:
Name | ID | Start_date | End_date |
TPSON | 1 | 01MAY2017:00:00:00 | . |
RED | 2 | 01MAY2015:00:00:00 | . |
RICH | 3 | 01NOV2018:00:00:00 | . |
VOR | 4 | 01APR2016:00:00:00 | . |
Fan | 5 | 07JUL2007:00:00:00 | 19OCT2015:00:00:00 |
VIX | 6 | 20AUG2018:00:00:00 | 31DEC2018:00:00:00 |
Assuming i understand your requirement-->
data have;
input Name $ ID Start_date :datetime20. End_date :datetime20.;
format start_date end_Date datetime20.;
cards;
TPSON 1 01MAY2017:00:00:00 .
TPSON 1 01MAR2016:00:00:00 31DEC2016:00:00:00
TPSON 1 14NOV2014:00:00:00 .
RED 2 01MAY2015:00:00:00 .
RED 2 01DEC2005:00:00:00 30APR2015:00:00:00
RICH 3 01NOV2018:00:00:00 .
RICH 3 01JAN2018:00:00:00 31DEC2018:00:00:00
VOR 4 01APR2016:00:00:00 .
VOR 4 01OCT2018:00:00:00 31OCT2018:00:00:00
Fan 5 07JUL2007:00:00:00 19OCT2015:00:00:00
VIX 6 20AUG2018:00:00:00 31DEC2018:00:00:00
;
proc sql;
create table want as
select *
from
(select Name,id, case when start_date=max(start_date) and end_date=. then start_date
when start_date=max(start_date) and end_date>. and count(*)>1 then .
else start_date
end as start_date format=datetime20.,end_date
from have
group by id,name)
group by id,name
having start_date=max(start_date)
order by id,name;
quit;
Data have;
Input Name $ ID Start_date: datetime26. End_date: datetime26.;
format Start_date End_date datetime26.;
cards;
TPSON 1 01MAY2017:00:00:00 .
TPSON 1 01MAR2016:00:00:00 31DEC2016:00:00:00
TPSON 1 14NOV2014:00:00:00 .
RED 2 01MAY2015:00:00:00 .
RED 2 01DEC2005:00:00:00 30APR2015:00:00:00
RICH 3 01NOV2018:00:00:00 .
RICH 3 01JAN2018:00:00:00 31DEC2018:00:00:00
VOR 4 01APR2016:00:00:00 .
VOR 4 01OCT2018:00:00:00 31OCT2018:00:00:00
Fan 5 07JUL2007:00:00:00 19OCT2015:00:00:00
VIX 6 20AUG2018:00:00:00 31DEC2018:00:00:00
run;
Proc sort data=have ;
by ID descending Start_date End_date;
run;
Data want;
set have;
by ID;
if first.id;
run;
thanks for replying however for this data point i dont want the max start date i want the date with empty end date
VOR 4 01APR2016:00:00:00 .
VOR 4 01OCT2018:00:00:00 31OCT2018:00:00:00
and you code i picking the max start date
Assuming i understand your requirement-->
data have;
input Name $ ID Start_date :datetime20. End_date :datetime20.;
format start_date end_Date datetime20.;
cards;
TPSON 1 01MAY2017:00:00:00 .
TPSON 1 01MAR2016:00:00:00 31DEC2016:00:00:00
TPSON 1 14NOV2014:00:00:00 .
RED 2 01MAY2015:00:00:00 .
RED 2 01DEC2005:00:00:00 30APR2015:00:00:00
RICH 3 01NOV2018:00:00:00 .
RICH 3 01JAN2018:00:00:00 31DEC2018:00:00:00
VOR 4 01APR2016:00:00:00 .
VOR 4 01OCT2018:00:00:00 31OCT2018:00:00:00
Fan 5 07JUL2007:00:00:00 19OCT2015:00:00:00
VIX 6 20AUG2018:00:00:00 31DEC2018:00:00:00
;
proc sql;
create table want as
select *
from
(select Name,id, case when start_date=max(start_date) and end_date=. then start_date
when start_date=max(start_date) and end_date>. and count(*)>1 then .
else start_date
end as start_date format=datetime20.,end_date
from have
group by id,name)
group by id,name
having start_date=max(start_date)
order by id,name;
quit;
Hi @hk2013, The proc sql approach I gave you should work and the reason I took that approach was I noticed @r_behata had already responded with a datastep, to have some distinction. If you are not comfortable with proc sql, and want only datastep, let us know
EDIT: Also, Please clarify, should the maxdate of startdate record had non missing enddate, is it ok to assume the next maxdate would have a missing enddate as your sample suggests? If not, the logic would have to scan until it finds a record with end_Date=. and the highest record of startdate?
Either way, with this clarification, the coding is less than a minute work
This is much simpler to understand in my opinion
data have;
input Name $ ID Start_date :datetime20. End_date :datetime20.;
format start_date end_Date datetime20.;
cards;
TPSON 1 01MAY2017:00:00:00 .
TPSON 1 01MAR2016:00:00:00 31DEC2016:00:00:00
TPSON 1 14NOV2014:00:00:00 .
RED 2 01MAY2015:00:00:00 .
RED 2 01DEC2005:00:00:00 30APR2015:00:00:00
RICH 3 01NOV2018:00:00:00 .
RICH 3 01JAN2018:00:00:00 31DEC2018:00:00:00
VOR 4 01APR2016:00:00:00 .
VOR 4 01OCT2018:00:00:00 31OCT2018:00:00:00
Fan 5 07JUL2007:00:00:00 19OCT2015:00:00:00
VIX 6 20AUG2018:00:00:00 31DEC2018:00:00:00
;
proc sql;
create table want as
select *
from have
where end_date=.
group by id
having max(start_date)=start_date
union all
select *
from have
where id not in (select id from have where end_date=.)
group by id
having max(start_date)=start_date
order by id;
quit;
data have;
input Name $ ID Start_date :datetime20. End_date :datetime20.;
format start_date end_Date datetime20.;
cards;
TPSON 1 01MAY2017:00:00:00 .
TPSON 1 01MAR2016:00:00:00 31DEC2016:00:00:00
TPSON 1 14NOV2014:00:00:00 .
RED 2 01MAY2015:00:00:00 .
RED 2 01DEC2005:00:00:00 30APR2015:00:00:00
RICH 3 01NOV2018:00:00:00 .
RICH 3 01JAN2018:00:00:00 31DEC2018:00:00:00
VOR 4 01APR2016:00:00:00 .
VOR 4 01OCT2018:00:00:00 31OCT2018:00:00:00
Fan 5 07JUL2007:00:00:00 19OCT2015:00:00:00
VIX 6 20AUG2018:00:00:00 31DEC2018:00:00:00
;
proc sort data=have out=_have;
by id end_date descending Start_date ;
run;
data want;
do until(last.id);
merge _have(in=a) _have(drop=start_date where=(_End_date=.) rename=(End_date=_End_date) in=b);
by id;
if a and not b then _m=max(_m,Start_date);
end;
do until(last.id);
set _have;
by id;
if first.id and end_date=. or _m=start_date then output;
end;
drop _:;
run;
Registration is now open for SAS Innovate 2025 , our biggest and most exciting global event of the year! Join us in Orlando, FL, May 6-9.
Sign up by Dec. 31 to get the 2024 rate of just $495.
Register now!
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.