Hi @dandsouza
Sorry, forgot to copy a ; before the first run; - this hopefullky looks better:
data have;
input filename :$20. date :date9.;
format date yymmdd10.;
datalines;
IncomeCalc 01Dec2021
IncomeCalc 02Dec2021
IncomeCalc 03Dec2021
IncomeCalc 04Dec2021
IncomeCalc 05Dec2021
IncomeCalc 06Dec2021
IncomeCalc 07Dec2021
DonateCalc 07Dec2021
ExpenseCalc 15Dec2021
IncomeCalc 08Dec2021
IncomeCalc 09Dec2021
IncomeCalc 19Dec2021
IncomeCalc 11Dec2021
IncomeCalc 12Dec2021
IncomeCalc 13Dec2021
IncomeCalc 14Dec2021
DonateCalc 07Dec2021
ExpenseCalc 30Dec2021
;
run;
/* Step 1 - calculate the count of days since last run + the number of times a given filename reoccurs. */
proc sort data=have;
by filename date;
run;
data interval (drop=date_last intervals_total) files (drop=date date_last interval);
set have;
by filename date;
if first.filename then intervals_total = 0;
intervals_total + 1;
date_last = lag(date);
if not first.filename then do;
interval = date-date_last;
output interval;
end;
if last.filename then do;
intervals_total = intervals_total - 1;
output files;
end;
run;
/* Step 2 - For each combination of filename / day_count, calculate the number of occurrences. */
proc sql;
create table interval_count as
select
filename,
interval,
count(*) as intervals_found
from interval
group by
filename,
interval
order by
filename,
intervals_found
;
quit;
/* Step 3 - For each combination of filename / day_count, calculate the percentage of this day_count.*/
data want_intermediate;
merge interval_count files;
by filename;
format pct_occurred 5.0;
pct_occurred = (intervals_found * 100) / intervals_total;
run;
/* Step 4 - For each filename, take the most frequently occurring day_count and translate the day_count to a scheduling period. */
data want_final;
set want_intermediate;
by filename;
length schedule $20;
if last.filename then do;
if 86 < interval < 33 then schedule = 'Quarterly';
else if 26 < interval < 33 then schedule = 'Monthly';
else if 12 < interval < 16 then schedule = 'SecondWeek';
else if interval = 7 then schedule = 'Weekly';
else if interval = 1 then schedule = 'Daily';
else schedule = 'Undetermined';
output;
end;
run;
... View more