Hi @Sathish_jammy
Here is an attempt to solve your problem:
data aaa2;
set aaa;
format Cancer_onset_Date mmddyy10.;
/* Retrieve year of detection*/
if prxmatch('/^(.*)(\d{4})(.*)$/i',Comments) then
comments_year_detect = prxchange('s/(.*)(\d{4})(.*)/$2/',1,Comments);
/* Retrieve age of detection year, month and day*/
/*NB: the following research is case insensitive*/
/*Assumption 1: the Year is preceded by 'detected at the age of'*/
/*Some spelling variants have been considered: e.g. Years|Yrs|Year|Yr*/
if prxmatch('/^(.*)(detected at the age of)\s+(\d|\d{2})(\sYear)(.*)$/i',Comments) then
Cancer_onset_Date_y = prxchange('s/^(.*)(detected at the age of)\s+(\d|\d{2})(\sYear)(.*)$/$3/i',1,Comments);
if prxmatch('/^(.*)(Years|Yrs|Year|Yr)\s+(\d|\d{2})\s+(Month)(.*)$/i',Comments) then
Cancer_onset_Date_m = prxchange('s/^(.*)(Years|Yrs|Year|Yr)\s+(\d|\d{2})\s+(Month)(.*)$/$3/i',1,Comments);
if prxmatch('/^(.*)(Months|Mths|Month|Mth)\s+(\d|\d{2})\s+(Day)(.*)$/i',Comments) then
Cancer_onset_Date_d = prxchange('s/^(.*)(Months|Mths|Month|Mth)\s+(\d|\d{2})\s+(Day)(.*)$/$3/i',1,Comments);
/* Compute Cancer_onset_Date. If the month/date is not specified, replace the month/date as same as from the IDs DOB */
Cancer_onset_Date = intnx('year',DOB,Cancer_onset_Date_y,"s");
if Cancer_onset_Date_m ne "" then Cancer_onset_Date = intnx('month',Cancer_onset_Date,Cancer_onset_Date_m,"s");
if Cancer_onset_Date_d ne "" then Cancer_onset_Date = intnx('day',Cancer_onset_Date,Cancer_onset_Date_d,"s");
run;
... View more