Sure, here is the full code snippet: data want; set have; /*CLEAN UP DATE VARIABLES - ALL DIFFERENT FORMATS, PUNTUATION, ETC - THERE IS BASICALLY NO STANDARDIZATION*/ array _dtvar_sa {*} request_dt_d1 start_dt_d1 final_consult_dt_d1 dt_last_collect_d1; array _yrvar_sa {*} request_yr start_yr final_consult_year year_last_collect; array _yrvar_sa2 {*} request_yr2 start_yr final_consult_year_2 year_last_collect_2; array _yrvar_sa3 {*} request_yr3 start_yr final_consult_year_3 year_last_collect_3; array _mthvar_sa {*} request_mth start_mth final_consult_mth mth_last_collect; array _mthvar_sa2 {*} request_mth2 start_mth final_consult_mth_2 mth_last_collect_2; array _dayvar_sa {*} request_day start_day final_consult_day day_last_collect; array _dayvar_sa2 {*} request_day2 start_day2 final_consult_day_2 day_last_collect_2; array _cleandate_sa {*} request_dt_sa start_dt_sa final_consult_dt_sa dt_last_collect_sa; array _cleandate_all {*} request_dt start_dt final_consult_dt dt_last_collect; format request_dt start_dt final_consult_dt_ dt_last_collect ddmmyy10.; /*FIRST, LET US SET VALUES OF N/A, MISSING, OR OTHER TEXT VALUES TO A NONSENSE DATE SO IT FITS INTO THE COLUMN BUT WILL*/ /*NOT BE CONFUSED AS HAVING A REAL VALUE*/ do i = 1 to dim(_dtvar_sa); if _dtvar_sa[i] = ' ' then _cleandate_all[i] = '01JAN1900'd; else if _dtvar_sa[i] in ('N/A', 'NA') then _cleandate_all[i]='01JAN1901'd; else if anyalpha(compress(_dtvar_sa[i], ' /.'))>0 then _cleandate_all[i]='01JAN1902'd; /*FOR TEXT DATE VALUES:*/ /*SPLIT DATE VALUES INTO MONTH, DAY, YEAR. ASSUME FIRST SET OF NUMBERS IS DAY UNLESS OBVIOUS OTHEWISE*/ else do; /*IF VALUE IS SPLIT INTO SEGMENTS*/ if index(_dtvar_sa[i], '.') or index(_dtvar_sa[i], '/') or index(_dtvar_sa[i], '-') then do; /*IF THERE ARE 3 SEGMENTS, SPLIT INTO DAY/MONTH/YEAR*/ if scan(_dtvar_sa[i], 3, './- ') ne ' ' then do; /*ASSUME THE FIRST SET OF NUMBERS IS THE DAY*/ if scan(_dtvar_sa[i], 1, ' /.-') in ('01','1', '02','2', '03','3', '04','4', '05','5', '06','6', '07','7', '08','8', '09','9', '10','11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31') then do; _dayvar_sa[i] = scan(left(strip(_dtvar_sa[i])), 1, ' / .-'); end; _dayvar_sa2[i] = input(_dayvar_sa[i], 3.); /*ASSUME THE SECOND SET OF NUMBERS IS THE MONTH UNLESS IT IS >12*/ if scan(_dtvar_sa[i], 2, ' /.-') in ('01','1', '02','2', '03','3', '04','4', '05','5', '06','6', '07','7', '08','8', '09','9', '10','11', '12') then do; _mthvar_sa[i] = scan(left(strip(_dtvar_sa[i])), 2, ' / .-'); end; _mthvar_sa2[i] = input(_mthvar_sa[i], 3.); /*ASSUME THE THIRD SET OF NUMBERS IS THE YEAR*/ _yrvar_sa[i]=scan(_dtvar_sa[i], 3, '/- .'); if _yrvar_sa[i] = '11' then _yrvar_sa2[i]='2011'; else if _yrvar_sa[i] = '12' then _yrvar_sa2[i]='2012'; else if _yrvar_sa[i] = '13' then _yrvar_sa2[i]='2013'; else if _yrvar_sa[i] = '14' then _yrvar_sa2[i]='2014'; else if _yrvar_sa[i] = '15' then _yrvar_sa2[i]='2015'; else if _yrvar_sa[i] = '16' then _yrvar_sa2[i]='2016'; else if _yrvar_sa[i] = '17' then _yrvar_sa2[i]='2017'; else if _yrvar_sa[i] = '18' then _yrvar_sa2[i]='2018'; else if _yrvar_sa[i] = '19' then _yrvar_sa2[i]='2019'; else if _yrvar_sa[i] = '20' then _yrvar_sa2[i]='2020'; else if _yrvar_sa[i] = '21' then _yrvar_sa2[i]='2021'; else if _yrvar_sa[i] = '22' then _yrvar_sa2[i] = '2022'; else if _yrvar_sa[i] ne ' ' then _yrvar_sa2[i] = _yrvar_sa[i]; end;end; /*SOME VALUES JUST HAVE A YEAR AND NOTHING ELSE*/ if _dtvar_sa[i] = '2021' then _yrvar_sa2[i] = '2021'; else if _dtvar_sa[i] = '2022' then _yrvar_sa2[i]='2022'; _yrvar_sa3[i] = input(_yrvar_sa2[i], 4.); end;end; run; Raw date var: Desired date var 1/1/1900 N/A 1/1/1901 RANDOM TEXT 1/1/1902 5/3/22 05/03/2022 24-02-2021 24/02/2021
... View more