Hi, first step, create a vertical structure of the strings data aatest;
length str1 $200;
str1 = 'Serum creatinine: 3160'; *WANT: 3160;output;
str1 = 'Serum creatinine is 3160'; *WANT: 3160;output;
str1 = '72(ref range 44-106)'; *WANT: 72 and 44-106;output;
str1 = '133 H umol/l (49-93)'; *WANT: 133 and 49-93;output;
str1 = '80(Ref. Int. 52-112 umol/L)'; *WANT: 80 and 52-112;output;
str1 = 'TEST RESULT\.br\COLLECTION DATE 6-FEB-2014\.br\24 HOUR URINE VOLUME 0.100\.br\\.br\SERUM CREATININE LEVEL 511 HI 64 - 110'; *WANT: 511 and 64-110;output;
run; second, standardize the chaos 🙂 data aaatest;
length out $200;
set aatest;
regId=prxparse('s/^(?:.*creatinine\s*.[^\d]*\s*)?(\d+).[^\d]*((\d+)\s*(-)\s*(\d+))?.*/$1 $2/i');
match=prxmatch(regId,str1);
out=prxchange(regId,-1,str1);
value=scan(out,1);
range=compress(substr(out,find(out,' ')));
if not match then put 'E' 'RROR: unrecognized string pattern, please check' str1=;
run; tadaaa!
... View more