Suppose I have a dataset as follows with two variables, one group_id variable and a date variable:
data id_dates;
input group_id $ date :mmddyy10.;
format date mmddyy10.;
datalines;
2A 01/04/2020
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020
2A 04/07/2020
2A 04/10/2020
2A 04/10/2020
2A 04/10/2020
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018
2B 01/23/2019
2B 12/18/2018
2B 12/23/2018
2B 12/19/2018
2B 08/05/2018
2B 11/19/2018
2B 01/01/2019
;
I want to find the max number of records within any 1 day period, 2 day period, and any 7 day period, given the dates are from within 60 days from the latest date, grouped by the group_id. My output from the example dataset should be:
group_id 1_day_max 2_day_max 1_week_max max_date
2A 3 3 4 05/10/2020
2B 4 5 6 01/24/2019
I can get the "max_date" appended, but I'm having trouble with the 2 day/1 week max variables. For finding the 1 day max, I did it in a few steps I believe but there might be a cleaner way if anyone has input or notices any issues.
proc sql;
create table max_dates as select group_id, max(date) format mmddyy10. as max_date from id_dates group by group_id;
create table by_date as select a.group_id,count(a.date) as num from id_dates a join max_dates b on a.group_id=b.group_id where date > intnx('day', max_date, -60) group by a.group_id, date;
create table max as select group_id,max(num) as max_num from by_date group by group_id;
quit;
Below what I could come-up with.
data have;
input group_id $ date :mmddyy10.;
format date date9.;
datalines;
2A 01/04/2020
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020
2A 04/07/2020
2A 04/10/2020
2A 04/10/2020
2A 04/10/2020
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018
2B 01/23/2019
2B 12/18/2018
2B 12/23/2018
2B 12/19/2018
2B 08/05/2018
2B 11/19/2018
2B 01/01/2019
;
proc sql;
create table inter as
select
l.group_id,
l.date,
r.max_date,
l.cnt_1day
from
(
select
group_id,
date,
count(*) as cnt_1day
from have
group by group_id, date
) l
inner join
(
select
group_id,
max(date) as max_date format=date9.
from have
group by group_id
) r
on
l.group_id=r.group_id and r.max_date-l.date between 0 and 60
order by group_id, date
;
quit;
data want;
set inter;
by group_id date;
array _dt_ {7} 8 _temporary_;
array _cnt_ {7} 8 _temporary_;
/* array _dt_ {7} 8 ;*/
/* array _cnt_ {7} 8 ;*/
/* retain _dt_ _cnt_;*/
if first.group_id then call missing(of _dt_[*], of _cnt_[*]);
_k=mod(_n_,7)+1;
_dt_[_k] =date;
_cnt_[_k] =cnt_1day;
do _i=1 to dim(_dt_);
if 0<=date-_dt_[_i]<=1 then cnt_2day+_cnt_[_i];
if 0<=date-_dt_[_i]<=6 then cnt_7day+_cnt_[_i];
end;
output;
call missing(of cnt_:);
drop _k _i;
run;
proc sql;
select
group_id,
max(cnt_1day) as cnt_1day,
max(cnt_2day) as cnt_2day,
max(cnt_7day) as cnt_7day
from want
group by group_id
;
quit;
Below what I could come-up with.
data have;
input group_id $ date :mmddyy10.;
format date date9.;
datalines;
2A 01/04/2020
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020
2A 04/07/2020
2A 04/10/2020
2A 04/10/2020
2A 04/10/2020
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018
2B 01/23/2019
2B 12/18/2018
2B 12/23/2018
2B 12/19/2018
2B 08/05/2018
2B 11/19/2018
2B 01/01/2019
;
proc sql;
create table inter as
select
l.group_id,
l.date,
r.max_date,
l.cnt_1day
from
(
select
group_id,
date,
count(*) as cnt_1day
from have
group by group_id, date
) l
inner join
(
select
group_id,
max(date) as max_date format=date9.
from have
group by group_id
) r
on
l.group_id=r.group_id and r.max_date-l.date between 0 and 60
order by group_id, date
;
quit;
data want;
set inter;
by group_id date;
array _dt_ {7} 8 _temporary_;
array _cnt_ {7} 8 _temporary_;
/* array _dt_ {7} 8 ;*/
/* array _cnt_ {7} 8 ;*/
/* retain _dt_ _cnt_;*/
if first.group_id then call missing(of _dt_[*], of _cnt_[*]);
_k=mod(_n_,7)+1;
_dt_[_k] =date;
_cnt_[_k] =cnt_1day;
do _i=1 to dim(_dt_);
if 0<=date-_dt_[_i]<=1 then cnt_2day+_cnt_[_i];
if 0<=date-_dt_[_i]<=6 then cnt_7day+_cnt_[_i];
end;
output;
call missing(of cnt_:);
drop _k _i;
run;
proc sql;
select
group_id,
max(cnt_1day) as cnt_1day,
max(cnt_2day) as cnt_2day,
max(cnt_7day) as cnt_7day
from want
group by group_id
;
quit;
Build your skills. Make connections. Enjoy creative freedom. Maybe change the world. Registration is now open through August 30th. Visit the SAS Hackathon homepage.
Register today!Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Select SAS Training centers are offering in-person courses. View upcoming courses for: