@Ronein You could try below to avoid a) sorting and b) writing the whole sorted table to disk. I did ask how you created this big source table in work because if that's done with an earlier data step then you incorporate the creation of the hash lookup table into this data step instead of the data _null_ step below because this will avoid a full pass through the data.
/* create sample data */
data ACCOUNT_MONTHLY_DATA_RF_CS_12;
do i = 1 to 30;
UPDATE_DATE = '01JAN2025'd + i;
FK_APPLICATION = round(1000 + ranuni(0) * 100);
FK_MONTHLY_DATA_ACCOUNT = round(2000 + ranuni(0) * 100);
REFERENCE_DATE = '01JAN2025'd + intnx('month', 0, i);
output;
output;
end;
drop i;
run;
/* create table with row_number of source table that contains the max reference_date */
data _null_;
if _n_=1 then
do;
length row_num max_REFERENCE_DATE 8;
dcl hash h1();
h1.defineKey('UPDATE_DATE', 'FK_APPLICATION', 'FK_MONTHLY_DATA_ACCOUNT');
h1.defineData('row_num','max_REFERENCE_DATE');
h1.defineDone();
call missing(row_num, max_REFERENCE_DATE );
end;
set ACCOUNT_MONTHLY_DATA_RF_CS_12(keep=UPDATE_DATE FK_APPLICATION FK_MONTHLY_DATA_ACCOUNT REFERENCE_DATE) end=last;
if h1.find() = 0 then
do;
if REFERENCE_DATE>max_REFERENCE_DATE then
do;
rc=h1.replace(key:UPDATE_DATE, key:FK_APPLICATION , key:FK_MONTHLY_DATA_ACCOUNT , data:_n_ , data:REFERENCE_DATE);
end;
end;
else
do;
rc=h1.add(key:UPDATE_DATE, key:FK_APPLICATION , key:FK_MONTHLY_DATA_ACCOUNT , data:_n_ , data:REFERENCE_DATE);
end;
if last then h1.output(dataset:"work.row_num_with_max_REFERENCE_DATE(keep=row_num rename=(row_num=_n_))");
run;
data want;
if _n_=1 then
do;
dcl hash h1(dataset:'work.row_num_with_max_REFERENCE_DATE');
h1.defineKey('_n_');
h1.defineDone();
end;
set ACCOUNT_MONTHLY_DATA_RF_CS_12;
if h1.check()=0 then output;
run;
... View more