BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
sashelppls
Calcite | Level 5

Suppose I have a dataset as follows with two variables, one group_id variable and a date variable:

 

 

data id_dates;
input group_id $ date :mmddyy10.;
format date mmddyy10.;
datalines;
2A 01/04/2020 
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020 
2A 04/07/2020 
2A 04/10/2020
2A 04/10/2020 
2A 04/10/2020 
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018 
2B 01/23/2019
2B 12/18/2018 
2B 12/23/2018
2B 12/19/2018 
2B 08/05/2018
2B 11/19/2018 
2B 01/01/2019
;

 

I want to find the max number of records within any 1 day period, 2 day period, and any 7 day period, given the dates are from within 60 days from the latest date, grouped by the group_id. My output from the example dataset should be:

 

group_id 1_day_max 2_day_max 1_week_max max_date
2A       3         3         4          05/10/2020
2B       4         5         6          01/24/2019

 

 I can get the "max_date" appended, but I'm having trouble with the 2 day/1 week max variables. For finding the 1 day max, I did it in a few steps I believe but there might be a cleaner way if anyone has input or notices any issues.

 

proc sql;
create table max_dates as select group_id, max(date) format mmddyy10. as max_date from id_dates group by group_id;
create table by_date as select a.group_id,count(a.date) as num from id_dates a join max_dates b on a.group_id=b.group_id where date > intnx('day', max_date, -60)  group by a.group_id, date;
create table max as select group_id,max(num) as max_num from by_date group by group_id;
quit;

 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Patrick
Opal | Level 21

Below what I could come-up with.

data have;
  input group_id $ date :mmddyy10.;
  format date date9.;
  datalines;
2A 01/04/2020 
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020 
2A 04/07/2020 
2A 04/10/2020
2A 04/10/2020 
2A 04/10/2020 
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018 
2B 01/23/2019
2B 12/18/2018 
2B 12/23/2018
2B 12/19/2018 
2B 08/05/2018
2B 11/19/2018 
2B 01/01/2019
;

proc sql;
  create table inter as
  select 
    l.group_id,
    l.date,
    r.max_date,
    l.cnt_1day
  from
  (
    select 
      group_id,
      date,
      count(*) as cnt_1day
    from have
    group by group_id, date
  ) l
  inner join
  (
    select 
      group_id,
      max(date) as max_date format=date9.
    from have
    group by group_id
  ) r
  on 
  l.group_id=r.group_id and r.max_date-l.date between 0 and 60
  order by group_id, date
  ;
quit;

data want;
  set inter;
  by group_id date;
  
  array _dt_  {7} 8 _temporary_;
  array _cnt_ {7} 8 _temporary_;

/*  array _dt_  {7} 8 ;*/
/*  array _cnt_ {7} 8 ;*/
/*  retain _dt_ _cnt_;*/

  if first.group_id then call missing(of _dt_[*], of _cnt_[*]);

  _k=mod(_n_,7)+1;
  _dt_[_k]  =date;
  _cnt_[_k] =cnt_1day;

  do _i=1 to dim(_dt_);
    if 0<=date-_dt_[_i]<=1 then cnt_2day+_cnt_[_i];
    if 0<=date-_dt_[_i]<=6 then cnt_7day+_cnt_[_i];
  end;

  output;
  call missing(of cnt_:);
  drop _k _i;

run;

proc sql;
  select
    group_id,
    max(cnt_1day) as cnt_1day,
    max(cnt_2day) as cnt_2day,
    max(cnt_7day) as cnt_7day 
  from want
  group by group_id
  ;
quit;

Patrick_0-1635943267433.png

 

View solution in original post

1 REPLY 1
Patrick
Opal | Level 21

Below what I could come-up with.

data have;
  input group_id $ date :mmddyy10.;
  format date date9.;
  datalines;
2A 01/04/2020 
2A 02/04/2020
2A 05/10/2020
2A 01/04/2020 
2A 04/07/2020 
2A 04/10/2020
2A 04/10/2020 
2A 04/10/2020 
2A 04/16/2020
2B 12/19/2018
2B 12/19/2018
2B 12/19/2018 
2B 01/23/2019
2B 12/18/2018 
2B 12/23/2018
2B 12/19/2018 
2B 08/05/2018
2B 11/19/2018 
2B 01/01/2019
;

proc sql;
  create table inter as
  select 
    l.group_id,
    l.date,
    r.max_date,
    l.cnt_1day
  from
  (
    select 
      group_id,
      date,
      count(*) as cnt_1day
    from have
    group by group_id, date
  ) l
  inner join
  (
    select 
      group_id,
      max(date) as max_date format=date9.
    from have
    group by group_id
  ) r
  on 
  l.group_id=r.group_id and r.max_date-l.date between 0 and 60
  order by group_id, date
  ;
quit;

data want;
  set inter;
  by group_id date;
  
  array _dt_  {7} 8 _temporary_;
  array _cnt_ {7} 8 _temporary_;

/*  array _dt_  {7} 8 ;*/
/*  array _cnt_ {7} 8 ;*/
/*  retain _dt_ _cnt_;*/

  if first.group_id then call missing(of _dt_[*], of _cnt_[*]);

  _k=mod(_n_,7)+1;
  _dt_[_k]  =date;
  _cnt_[_k] =cnt_1day;

  do _i=1 to dim(_dt_);
    if 0<=date-_dt_[_i]<=1 then cnt_2day+_cnt_[_i];
    if 0<=date-_dt_[_i]<=6 then cnt_7day+_cnt_[_i];
  end;

  output;
  call missing(of cnt_:);
  drop _k _i;

run;

proc sql;
  select
    group_id,
    max(cnt_1day) as cnt_1day,
    max(cnt_2day) as cnt_2day,
    max(cnt_7day) as cnt_7day 
  from want
  group by group_id
  ;
quit;

Patrick_0-1635943267433.png

 

hackathon24-white-horiz.png

2025 SAS Hackathon: There is still time!

Good news: We've extended SAS Hackathon registration until Sept. 12, so you still have time to be part of our biggest event yet – our five-year anniversary!

Register Now

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 1 reply
  • 947 views
  • 0 likes
  • 2 in conversation