Suppose I have a dataset as follows with two variables, one group_id variable and a date variable:
data id_dates;
input group_id $ date :mmddyy10.;
format date mmddyy10.;
datalines;
2A 01/04/2020
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020
2A 04/07/2020
2A 04/10/2020
2A 04/10/2020
2A 04/10/2020
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018
2B 01/23/2019
2B 12/18/2018
2B 12/23/2018
2B 12/19/2018
2B 08/05/2018
2B 11/19/2018
2B 01/01/2019
;
I want to find the max number of records within any 1 day period, 2 day period, and any 7 day period, given the dates are from within 60 days from the latest date, grouped by the group_id. My output from the example dataset should be:
group_id 1_day_max 2_day_max 1_week_max max_date
2A 3 3 4 05/10/2020
2B 4 5 6 01/24/2019
I can get the "max_date" appended, but I'm having trouble with the 2 day/1 week max variables. For finding the 1 day max, I did it in a few steps I believe but there might be a cleaner way if anyone has input or notices any issues.
proc sql;
create table max_dates as select group_id, max(date) format mmddyy10. as max_date from id_dates group by group_id;
create table by_date as select a.group_id,count(a.date) as num from id_dates a join max_dates b on a.group_id=b.group_id where date > intnx('day', max_date, -60) group by a.group_id, date;
create table max as select group_id,max(num) as max_num from by_date group by group_id;
quit;
Below what I could come-up with.
data have;
input group_id $ date :mmddyy10.;
format date date9.;
datalines;
2A 01/04/2020
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020
2A 04/07/2020
2A 04/10/2020
2A 04/10/2020
2A 04/10/2020
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018
2B 01/23/2019
2B 12/18/2018
2B 12/23/2018
2B 12/19/2018
2B 08/05/2018
2B 11/19/2018
2B 01/01/2019
;
proc sql;
create table inter as
select
l.group_id,
l.date,
r.max_date,
l.cnt_1day
from
(
select
group_id,
date,
count(*) as cnt_1day
from have
group by group_id, date
) l
inner join
(
select
group_id,
max(date) as max_date format=date9.
from have
group by group_id
) r
on
l.group_id=r.group_id and r.max_date-l.date between 0 and 60
order by group_id, date
;
quit;
data want;
set inter;
by group_id date;
array _dt_ {7} 8 _temporary_;
array _cnt_ {7} 8 _temporary_;
/* array _dt_ {7} 8 ;*/
/* array _cnt_ {7} 8 ;*/
/* retain _dt_ _cnt_;*/
if first.group_id then call missing(of _dt_[*], of _cnt_[*]);
_k=mod(_n_,7)+1;
_dt_[_k] =date;
_cnt_[_k] =cnt_1day;
do _i=1 to dim(_dt_);
if 0<=date-_dt_[_i]<=1 then cnt_2day+_cnt_[_i];
if 0<=date-_dt_[_i]<=6 then cnt_7day+_cnt_[_i];
end;
output;
call missing(of cnt_:);
drop _k _i;
run;
proc sql;
select
group_id,
max(cnt_1day) as cnt_1day,
max(cnt_2day) as cnt_2day,
max(cnt_7day) as cnt_7day
from want
group by group_id
;
quit;
Below what I could come-up with.
data have;
input group_id $ date :mmddyy10.;
format date date9.;
datalines;
2A 01/04/2020
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020
2A 04/07/2020
2A 04/10/2020
2A 04/10/2020
2A 04/10/2020
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018
2B 01/23/2019
2B 12/18/2018
2B 12/23/2018
2B 12/19/2018
2B 08/05/2018
2B 11/19/2018
2B 01/01/2019
;
proc sql;
create table inter as
select
l.group_id,
l.date,
r.max_date,
l.cnt_1day
from
(
select
group_id,
date,
count(*) as cnt_1day
from have
group by group_id, date
) l
inner join
(
select
group_id,
max(date) as max_date format=date9.
from have
group by group_id
) r
on
l.group_id=r.group_id and r.max_date-l.date between 0 and 60
order by group_id, date
;
quit;
data want;
set inter;
by group_id date;
array _dt_ {7} 8 _temporary_;
array _cnt_ {7} 8 _temporary_;
/* array _dt_ {7} 8 ;*/
/* array _cnt_ {7} 8 ;*/
/* retain _dt_ _cnt_;*/
if first.group_id then call missing(of _dt_[*], of _cnt_[*]);
_k=mod(_n_,7)+1;
_dt_[_k] =date;
_cnt_[_k] =cnt_1day;
do _i=1 to dim(_dt_);
if 0<=date-_dt_[_i]<=1 then cnt_2day+_cnt_[_i];
if 0<=date-_dt_[_i]<=6 then cnt_7day+_cnt_[_i];
end;
output;
call missing(of cnt_:);
drop _k _i;
run;
proc sql;
select
group_id,
max(cnt_1day) as cnt_1day,
max(cnt_2day) as cnt_2day,
max(cnt_7day) as cnt_7day
from want
group by group_id
;
quit;
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.