Here is a sample.
data namelist;
input name $ 10.;
datalines;
Tom
John
Frank
Anna
;
run;
data have;
input strings $ 40.;
datalines;
Tom has a cat
John has a dog
Anna has a parrot
Frank has a dog
Anna has a cow
John has a tiger
Tom AB has a tiger
Tom A41 has a lion
Tom B12 has a dolphin
Tom 2 has a horse
Tom-32 has a dog
;
run;
data _NULL_;
length id 8 name $10 strings $40 count 8;
call missing(name, strings, count);
declare hash h(suminc:"count", multidata:"y", ordered:"y");
rc=h.definekey("id");
rc=h.definedata("id", "strings", "name", "count");
rc=h.definedone();
declare hash nl(suminc:"count", multidata:"y", ordered:"y");
rc=nl.definekey("name");
rc=nl.definedata("name", "id");
rc=nl.definedone();
do until(aaa);
set namelist end=aaa;
id+1;
nl.add(key:name, data:name, data:id);
end;
do until(eof);
set have end=eof;
count=1;
name=scan(strings, 1, " ");
rc=nl.find();
if rc=0 and prxmatch("s/(Tom)?(A|B)\d+|(AB)-?|( \d)/match/i", strings)=0 then
do;
h.ref();
h.sum(sum:count);
h.replace();
end;
end;
if eof then
h.output(dataset:"work.want(drop=strings id)");
run;
... View more