Hi everyone
i am trying to convert the following code into macro, it basically breakes a text into sentences and then picks a sentence that has
keyword_1, and then extracts a number of words (5) before and after that keyword and check if any of these words match
keyword_2
so if you start with :
text text text text text text text text text text .text text text text text text text text text .text text text text text text text text text .text text text text text . GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED. text text text text text text text text text text text text text text text text text text text
if you use keywords "sentence" and "should", The first part of the code picks up:
text text . GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED
second part of the code will find the keyword "should" and return that keyword_2='present'
Here is the working code:
data want_1;
set have;
prxid=prxparse('/([^\.]|\.(?=\d))+\./o');
start=1;stop=length(textstring);
call prxnext(prxid,start,stop,textstring,position,length);
do while(position>0);
hh=substr(textstring,position,length);
if find(hh,'keyword_1','i') then output;
call prxnext(prxid,start,stop,textstring,position,length);
end;
drop start stop position length prxid;
run;
data want_2;
set want_1;
do i=1 to countw(hh," ");
if scan(hh,i," ")="keyword_1" then do;
beforei=scan(hh,i-1," ");
beforei2=scan(hh,i-2," ");
beforei3=scan(hh,i-3," ");
beforei4=scan(hh,i-4," ");
beforei5=scan(hh,i-5," ");
afteri=scan(hh,i+1," ");
afteri2=scan(hh,i+2," ");
afteri3=scan(hh,i+3," ");
afteri4=scan(hh,i+4," ");
afteri5=scan(hh,i+5," ");
j=trim(left(beforei5)) ||' '||trim(left(beforei4)) ||' '||trim(left(beforei3)) ||' '|| trim(left(beforei2)) ||' '|| trim(left(beforei)) ||' '|| 'keyword_1' ||' '|| trim(left(afteri)) ||' '|| trim(left(afteri2)) ||' '|| trim(left(afteri3)) ||' '|| trim(left(afteri4)) ||' '|| trim(left(afteri5));
end;
end;
keyword_2='absent';
if prxmatch( '/\s(keyword_2)\s/i', j) then keyword_2='Present';
run;
Here is an attempt at converting it to macro:
%MACRO PRTMAC(string, match_a, match_b, wn);
%local start stop position position;
%local RegexID found;
%local i x st en;
%global string_temp_1 string_temp_2 string_temp_3 retuen_variable;
%let RegexID=%sysfunc(prxparse('/([^\.]|\.(?=\d))+\./o'));
%let start=1;
%let position=0;
%let length=0;
%let stop=%length(&string);
%syscall prxnext(RegexID,start,stop,string,position,length);
%do %while(&position>0);
%let string_temp_1=substr(&string,&position,&length);
%if %sysfunc(find(&string_temp_1,match_a)) %then %let string_temp_2=&string_temp_1;
%syscall prxnext(RegexID,start,stop,string,position,length);
%end;
%do i=1 %to %sysfunc(countw(&string_temp_2," "));
%if %sysfunc(scan(&string_temp_2,i," "))=&match_a %then %do;
%if &wn > &i %then %let st=1;
%else %let st=&i-&wn;
%if &wn+&i > countw(&string_temp_2," ") %then %let en=countw(&string_temp_2," ");
%else %let en=&wn;
%do x=&st %to &en;
%let string_temp3=&string_temp3 ||' '|| %sysfunc(scan(string_temp_2,i-x," "));
%end;
%end;
%end;
%if %sysfunc(prxmatch( '/\s(match_b)\s/i', string_temp3)) %then %let retuen_variable='Present';
%else %let retuen_variable='Absent';
data d;
set analyse;
String_1=&string_temp_1;
String_2=&string_temp_2;
String_3=&string_temp_3;
Return=&retuen_variable;
run;
%mend;
data want;
set have;
%PRTMAC(result, "keyword", "test", 3);
run;
I am not getting this to work, any suggestion on any changes pease?
Kind regards
... View more