BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
JacAder
Obsidian | Level 7

Given the following variables
table 1: ID, StartDate, Frequency
table 2: ID, TargetDate
To compute
table 2: RollingStartDate, MonthCount, CalendarDayCount

 

1) for each ID, the VERY FIRST 'RollingStartDate' in table 2 equals to 'StartDate' in table 1;
    as moving forward, the subsequent "RollingStartDate" is updated according to the Frequency in table 1.
    eg., for id1, the very first 'RollingStartDate' is 9/30/2018 (the same as that initially have in table 1);

           for id2,  the very first 'RollingStartDate' is 11/1/2017(the same as that initially have in table 1);

           as moving forward:
           1.a) if 'Frequency' in table 1 equals 6, then for every 6 months, reset the 'RollingStartDate' as the last day of the 6th month;
                   eg., fo id1, after 6 month, the RollingStartDate will be reset as 3/31/2019.
            1.b) if 'Frequency' in table 1 equals 3, then for every 3 months, reset the 'StartDate' as the last day of the 3th month;
                   eg., fo id3, after 3 month, the RollingStartDate will be reset as 10/31/2016.


2) 'MonthCount' is the number of months between the 'TargetDate' and 'RollingStartDate';
     eg., the number in E16 indicates that there are 3 months between 12/31/2018 and 9/30/2018.

 

3) 'CalendarDayCount' is the number of calendar days between the 'TargetDate' and 'RollingStartDate';
     eg., the number in F16 indicates that there are 92 calendar days between 12/31/2018 and 9/30/2018.

 

Some thoughts: loop for each id, first generate a list of "RollingStartDate" using Table 1 information; then generate a list of "TargetDate" with all missing dates are filled; finally, compare above two lists (not sure whether this is a good way).

 

table1.png

table2.png

 

data table1;
informat ID $3. StartDate mmddyy10. Frequency 2.;
format  StartDate mmddyy10.;
input ID StartDate Frequency;
cards;
id1	9/30/2018	6
id2	11/1/2017	6
id3	8/1/2016	3
id4	10/31/2015	12
;
run;

data table2;
informat ID $3. TargetDate mmddyy10.;
format  TargetDate mmddyy10.;
input ID TargetDate;
cards;
id1	7/31/2018
id1	8/31/2018
id1	9/30/2018
id1	10/31/2018
id1	11/30/2018
id1	12/31/2018
id1	1/31/2019
id1	2/28/2019
id1	3/31/2019
id1	5/31/2019
id1	6/30/2019
id1	7/31/2019
id1	8/31/2019
id1	10/31/2019
id1	11/30/2019
id1	12/31/2019
id1	1/31/2020
id1	3/31/2020
id1	6/30/2020
id2	7/31/2017
id2	8/31/2017
id2	9/30/2017
id2	10/31/2017
id2	12/31/2017
id2	1/31/2018
id2	2/28/2018
id2	3/31/2018
id2	4/30/2018
id2	5/31/2018
id2	6/30/2018
id2	7/31/2018
id2	8/31/2018
id2	9/30/2018
id2	10/31/2018
id2	11/30/2018
id2	12/31/2018
id2	2/28/2019
id2	3/31/2019
id2	5/31/2019
id2	6/30/2019
id3	7/31/2016
id3	8/31/2016
id3	9/30/2016
id3	10/31/2016
id3	11/30/2016
id3	12/31/2016
id3	1/31/2017
id3	2/28/2017
id3	3/31/2017
id3	5/31/2017
id3	6/30/2017
id3	7/31/2017
id3	8/31/2017
id3	9/30/2017
id3	10/31/2017
id3	11/30/2017
id3	2/28/2018
id3	3/31/2018
id3	4/30/2018
id3	5/31/2018
id3	6/30/2018
id4	7/31/2015
id4	8/31/2015
id4	9/30/2015
id4	11/30/2015
id4	12/31/2015
id4	1/31/2016
id4	2/29/2016
id4	3/31/2016
id4	4/30/2016
id4	5/31/2016
id4	6/30/2016
id4	7/31/2016
id4	8/31/2016
id4	9/30/2016
id4	11/30/2016
id4	12/31/2016
id4	1/31/2017
id4	2/28/2017
id4	4/30/2017
id4	5/31/2017
id4	6/30/2017
;
run;
1 ACCEPTED SOLUTION

Accepted Solutions
Ksharp
Super User
data table1;
informat ID $3. StartDate mmddyy10. Frequency 2.;
format  StartDate mmddyy10.;
input ID StartDate Frequency;
cards;
id1	9/30/2018	6
id2	11/1/2017	6
id3	8/1/2016	3
id4	10/31/2015	12
;
run;

data table2;
informat ID $3. TargetDate mmddyy10.;
format  TargetDate mmddyy10.;
input ID TargetDate;
cards;
id1	7/31/2018
id1	8/31/2018
id1	9/30/2018
id1	10/31/2018
id1	11/30/2018
id1	12/31/2018
id1	1/31/2019
id1	2/28/2019
id1	3/31/2019
id1	5/31/2019
id1	6/30/2019
id1	7/31/2019
id1	8/31/2019
id1	10/31/2019
id1	11/30/2019
id1	12/31/2019
id1	1/31/2020
id1	3/31/2020
id1	6/30/2020
id2	7/31/2017
id2	8/31/2017
id2	9/30/2017
id2	10/31/2017
id2	12/31/2017
id2	1/31/2018
id2	2/28/2018
id2	3/31/2018
id2	4/30/2018
id2	5/31/2018
id2	6/30/2018
id2	7/31/2018
id2	8/31/2018
id2	9/30/2018
id2	10/31/2018
id2	11/30/2018
id2	12/31/2018
id2	2/28/2019
id2	3/31/2019
id2	5/31/2019
id2	6/30/2019
id3	7/31/2016
id3	8/31/2016
id3	9/30/2016
id3	10/31/2016
id3	11/30/2016
id3	12/31/2016
id3	1/31/2017
id3	2/28/2017
id3	3/31/2017
id3	5/31/2017
id3	6/30/2017
id3	7/31/2017
id3	8/31/2017
id3	9/30/2017
id3	10/31/2017
id3	11/30/2017
id3	2/28/2018
id3	3/31/2018
id3	4/30/2018
id3	5/31/2018
id3	6/30/2018
id4	7/31/2015
id4	8/31/2015
id4	9/30/2015
id4	11/30/2015
id4	12/31/2015
id4	1/31/2016
id4	2/29/2016
id4	3/31/2016
id4	4/30/2016
id4	5/31/2016
id4	6/30/2016
id4	7/31/2016
id4	8/31/2016
id4	9/30/2016
id4	11/30/2016
id4	12/31/2016
id4	1/31/2017
id4	2/28/2017
id4	4/30/2017
id4	5/31/2017
id4	6/30/2017
;
run;

proc sql;
create table temp as
select 	a.*,max_date format=mmddyy10.
 from table1 as a left join 
(select id,max(TargetDate) as max_date from table2 group by id)	as b
on a.id=b.id;
quit;
data temp1;
 set temp;
 do i=0 to intck('month',startdate,max_date,'c');
   start=.;
   date=intnx('month',startdate,i,'e');
   if mod(i,Frequency)=0 then start=1;
   output;
 end;
 keep id date start;
 format date mmddyy10.;
run;
data temp2;
 merge table2 temp1(rename=(date=targetdate) );
 by id targetdate;
 if start then rollingstartdate=targetdate;
 format rollingstartdate mmddyy10.;
 run;
data temp3;
 merge temp2 table1	;
 by id;
 monthcount=mod(intck('month',startdate,targetdate,'c'),frequency);
 if first.id then group=0;
 if not  first.id  and  lag(start)=1 and start=. then group+1;
 if group>0 and monthcount=0 then monthcount=frequency;
run;
data temp4;
 set temp3;
 if group=0 then group=1;
 lag_rollingstartdate=lag(rollingstartdate);
 drop start  frequency 	startdate;
 format  lag_rollingstartdate mmddyy10.;
run;

proc sql;
create table want as
 select *,case when group=1 then targetdate-min(rollingstartdate)
          else targetdate-min(lag_rollingstartdate) end as CalendarDayCount
  from temp4
   group by id,group
   order by id ,targetdate;
quit;

View solution in original post

9 REPLIES 9
novinosrin
Tourmaline | Level 20

Hi @JacAder  Your Table1 should have Month end dates to match the targetdate in table2, that causes a confusion where some dates like 11/1 and 8/1. Those have to be made consistent otherwise it;s gonna be a bit of boring verbose to standardize those as well. 

If you want, make them all month end : intnx('mon',date,0','e') if it aligns with business logic

 

Other than that, pretty straight forward


data table1;
informat ID $3. StartDate mmddyy10. Frequency 2.;
format  StartDate mmddyy10.;
input ID StartDate Frequency;
cards;
id1	9/30/2018	6
id2	11/1/2017	6
id3	8/1/2016	3
id4	10/31/2015	12
;
run;

data table2;
infile cards truncover;
informat ID $3. TargetDate mmddyy10.;
format  TargetDate mmddyy10.;
input ID TargetDate;
cards;
id1	7/31/2018
id1	8/31/2018
id1	9/30/2018
id1	10/31/2018
id1	11/30/2018
id1	12/31/2018
id1	1/31/2019
id1	2/28/2019
id1	3/31/2019
id1	4/30/2019
id1	5/31/2019
id1	6/30/2019
id1	7/31/2019
id1	8/31/2019
id1	9/30/2019
id1	10/31/2019
id1	11/30/2019
id1	12/31/2019
id1	1/31/2020
id1	. 
id1	. 
id1	. 
id1	. 
id1	6/30/2020
id2	7/31/2017
id2	8/31/2017
id2	9/30/2017
id2	10/31/2017
id2	. 
id2	12/31/2017
id2	1/31/2018
id2	2/28/2018
id2	3/31/2018
id2	4/30/2018
id2	5/31/2018
id2	6/30/2018
id2	7/31/2018
id2	8/31/2018
id2	9/30/2018
id2	10/31/2018
id2	11/30/2018
id2	12/31/2018
id2	. 
id2	2/28/2019
id2	3/31/2019
id2	. 
id2	5/31/2019
id2	6/30/2019
id3	7/31/2016
id3	8/31/2016
id3	9/30/2016
id3	10/31/2016
id3	11/30/2016
id3	12/31/2016
id3	1/31/2017
id3	2/28/2017
id3	3/31/2017
id3	. 
id3	5/31/2017
id3	6/30/2017
id3	7/31/2017
id3	8/31/2017
id3	9/30/2017
id3	10/31/2017
id3	11/30/2017
id3	. 
id3	. 
id3	2/28/2018
id3	3/31/2018
id3	4/30/2018
id3	5/31/2018
id3	6/30/2018
id4	7/31/2015
id4	8/31/2015
id4	9/30/2015
id4	. 
id4	11/30/2015
id4	12/31/2015
id4	1/31/2016
id4	2/29/2016
id4	3/31/2016
id4	4/30/2016
id4	5/31/2016
id4	6/30/2016
id4	7/31/2016
id4	8/31/2016
id4	9/30/2016
id4	. 
id4	11/30/2016
id4	12/31/2016
id4	1/31/2017
id4	2/28/2017
id4	. 
id4	4/30/2017
id4	5/31/2017
id4	6/30/2017
;
run;

data want;
 do until(last.id);
  merge table2 table1;
  by id;
  if not missing(targetdate) then _t=targetdate;
  else do;targetdate=intnx('mon',_t,1,'e');_t=targetdate;end;
  if targetdate>=startdate then do;
    if startdate=targetdate then rollingstartdate=targetdate;
  month_count=intck('mon',rollingstartdate,targetdate);
  calendardaycount=intck('day',rollingstartdate,targetdate);
 end;
  else if targetdate<startdate then do;
   month_count=intck('mon',startdate,targetdate);
   calendardaycount=intck('day',startdate,targetdate);
  end;
  output;
 if month_count=frequency then rollingstartdate=targetdate;
end;
format rollingstartdate mmddyy10.;
drop _: frequency startdate;
run;

 

JacAder
Obsidian | Level 7

@novinosrin very grateful as always for your kindly assistance. somehow the code does not go through all the id when I tried on my computer. I will try to check what makes the error. 

Ksharp
Super User
data table1;
informat ID $3. StartDate mmddyy10. Frequency 2.;
format  StartDate mmddyy10.;
input ID StartDate Frequency;
cards;
id1	9/30/2018	6
id2	11/1/2017	6
id3	8/1/2016	3
id4	10/31/2015	12
;
run;

data table2;
informat ID $3. TargetDate mmddyy10.;
format  TargetDate mmddyy10.;
input ID TargetDate;
cards;
id1	7/31/2018
id1	8/31/2018
id1	9/30/2018
id1	10/31/2018
id1	11/30/2018
id1	12/31/2018
id1	1/31/2019
id1	2/28/2019
id1	3/31/2019
id1	5/31/2019
id1	6/30/2019
id1	7/31/2019
id1	8/31/2019
id1	10/31/2019
id1	11/30/2019
id1	12/31/2019
id1	1/31/2020
id1	3/31/2020
id1	6/30/2020
id2	7/31/2017
id2	8/31/2017
id2	9/30/2017
id2	10/31/2017
id2	12/31/2017
id2	1/31/2018
id2	2/28/2018
id2	3/31/2018
id2	4/30/2018
id2	5/31/2018
id2	6/30/2018
id2	7/31/2018
id2	8/31/2018
id2	9/30/2018
id2	10/31/2018
id2	11/30/2018
id2	12/31/2018
id2	2/28/2019
id2	3/31/2019
id2	5/31/2019
id2	6/30/2019
id3	7/31/2016
id3	8/31/2016
id3	9/30/2016
id3	10/31/2016
id3	11/30/2016
id3	12/31/2016
id3	1/31/2017
id3	2/28/2017
id3	3/31/2017
id3	5/31/2017
id3	6/30/2017
id3	7/31/2017
id3	8/31/2017
id3	9/30/2017
id3	10/31/2017
id3	11/30/2017
id3	2/28/2018
id3	3/31/2018
id3	4/30/2018
id3	5/31/2018
id3	6/30/2018
id4	7/31/2015
id4	8/31/2015
id4	9/30/2015
id4	11/30/2015
id4	12/31/2015
id4	1/31/2016
id4	2/29/2016
id4	3/31/2016
id4	4/30/2016
id4	5/31/2016
id4	6/30/2016
id4	7/31/2016
id4	8/31/2016
id4	9/30/2016
id4	11/30/2016
id4	12/31/2016
id4	1/31/2017
id4	2/28/2017
id4	4/30/2017
id4	5/31/2017
id4	6/30/2017
;
run;

proc sql;
create table temp as
select 	a.*,max_date format=mmddyy10.
 from table1 as a left join 
(select id,max(TargetDate) as max_date from table2 group by id)	as b
on a.id=b.id;
quit;
data temp1;
 set temp;
 do i=0 to intck('month',startdate,max_date,'c');
   start=.;
   date=intnx('month',startdate,i,'e');
   if mod(i,Frequency)=0 then start=1;
   output;
 end;
 keep id date start;
 format date mmddyy10.;
run;
data temp2;
 merge table2 temp1(rename=(date=targetdate) );
 by id targetdate;
 if start then rollingstartdate=targetdate;
 format rollingstartdate mmddyy10.;
 run;
data temp3;
 merge temp2 table1	;
 by id;
 monthcount=mod(intck('month',startdate,targetdate,'c'),frequency);
 if first.id then group=0;
 if not  first.id  and  lag(start)=1 and start=. then group+1;
 if group>0 and monthcount=0 then monthcount=frequency;
run;
data temp4;
 set temp3;
 if group=0 then group=1;
 lag_rollingstartdate=lag(rollingstartdate);
 drop start  frequency 	startdate;
 format  lag_rollingstartdate mmddyy10.;
run;

proc sql;
create table want as
 select *,case when group=1 then targetdate-min(rollingstartdate)
          else targetdate-min(lag_rollingstartdate) end as CalendarDayCount
  from temp4
   group by id,group
   order by id ,targetdate;
quit;
JacAder
Obsidian | Level 7

@Ksharp so thankful for your great efforts! the code works well except that results for id2 and id3 are misaligned. Because StartDate of id2 and id3 are the beginning of month, rather than the end of month like those for id1 and id4, that brings in the chaos.  As @novinosrin pointed out, if possible, I should convert all StartDate to the end of the month. Inspired by both of your professional inputs, StartDate in table 1 such as 11/1/2017 and 8/1/2016 should be converted to the last day of lag one month, eg., 11/1/2017 is converted to 10/31/2017, and 8/1/2016 is converted to 7/31/2016 (one day difference is acceptable in my case). 

Ksharp
Super User

OK. Try this. notice the code I changed (annotation). You could change it as your wish .

But you should know "date=intnx('month',startdate,i,'s'); " would return the same day, i.e. 2-28-2018 --> 7-28-2018 .

So if possible , as you said turn all the date into the end day of month .

 


proc sql;
create table temp as
select 	a.*,max_date format=mmddyy10.
 from table1 as a left join 
(select id,max(TargetDate) as max_date from table2 group by id)	as b
on a.id=b.id;
quit;
data temp1;
 set temp;
 do i=0 to intck('month',startdate,max_date,'c');
   start=.;
   date=intnx('month',startdate,i,'s');	/* <------- */
   if mod(i,Frequency)=0 then start=1;
   output;
 end;
 keep id date start;
 format date mmddyy10.;
run;
data temp2;
 merge table2 temp1(rename=(date=targetdate) );
 by id targetdate ;   
  format rollingstartdate mmddyy10. ;	
 if start then rollingstartdate=targetdate;
 run;
data temp3;
 merge temp2 table1	;
 by id;
 monthcount=mod(intck('month',startdate,targetdate,'c'),frequency);
 if first.id then group=0;
 if not  first.id  and  lag(start)=1 and start=. then group+1;
 if group>0 and monthcount=0 then monthcount=frequency;
run;
data temp4;
 set temp3;
 if group=0 then group=1;
 lag_rollingstartdate=lag(rollingstartdate);
 drop start  frequency 	startdate;
 format  lag_rollingstartdate mmddyy10.;
run;

proc sql;
create table want as
 select *,case when group=1 then targetdate-min(rollingstartdate)
          else targetdate-min(lag_rollingstartdate) end as CalendarDayCount
  from temp4
   group by id,group
   order by id ,targetdate;
quit;

 

JacAder
Obsidian | Level 7

@Ksharp thousand thanks for your comprehensive solution! 

JacAder
Obsidian | Level 7

@Ksharp 

 

when running the following code:

data temp1;
 set temp;
 do i=0 to intck('month',startdate,max_date,'c');
   start=.;
   date=intnx('month',startdate,i,'e');
   if mod(i,Frequency)=0 then start=1;
   output;
 end;
 keep id date start;
 format date mmddyy10.;
run;

it reports the error:

 

ERROR: Invalid DO loop control information, either the INITIAL or TO expression is missing or
the BY expression is missing, zero, or invalid.
WARNING: The data set WORK.TEMP1 may be incomplete. When this step was stopped there were 0
observations and 3 variables.

 

any thoughts? thanks a lot!

Ksharp
Super User

There must be some MISSING value in startdate or in max_date.

 

Check these two variables via MISSING() function.

hackathon24-white-horiz.png

The 2025 SAS Hackathon has begun!

It's finally time to hack! Remember to visit the SAS Hacker's Hub regularly for news and updates.

Latest Updates

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 9 replies
  • 3376 views
  • 4 likes
  • 3 in conversation