Your solution doesn't deal with junk text, but I like how you thought about issues caused by hyphens. I've added that flexibility to my solution. Also, the word format omits 'and' in strings Iike 'one hundred and one' which sas writes 'one hundred one' I've now catered for this as well. Lastly, translate() is better suited than tranwrd() for replacing characters. data NUMBERS; * create translation table;
do N=1 to 1000;
C=translate(put(N,words32.),' ','-');
output;
end;
run;
data RANDOMTEXT; * create sample data, add junk text ;
set NUMBERS(keep=C);
C=catx(' ',C,' years ago');
if ranuni(0) lt .1;
run;
data TEXT2DIGITS(keep= C C1); * replace data in sample and substitute letters for numbers;
retain DSID;
if _N_=1 then DSID= open ('NUMBERS','i');
if LASTOBS then RC = close(DSID);
set RANDOMTEXT end=LASTOBS;
C1 =compbl(tranwrd(translate(C,' ','-'),'and',' '));
* start from largest number so we dont match the one in thirty-one before matching thirty-one;
do OBS=1000 to 1 by -1;
RC =fetchobs(DSID,OBS);
NUM=getvarc(DSID,2);
if find(C1,NUM,'it') then do;
C1=tranwrd(C1,trim(NUM),put(getvarn(DSID,1),4.));
output;
return;
end;
end;
output; * no match ;
run;
... View more