BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
ammarhm
Lapis Lazuli | Level 10

Hi everyone

i am trying to convert the following code into macro, it basically breakes a text into sentences and then picks a sentence that has 

keyword_1, and then extracts a number of words (5) before and after that keyword and check if any of these words match 

keyword_2

so if you start with :

text text text text text text text text text text .text text text text text text text text text .text text text text text text text text text .text text text text text .  GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED. text text text text text text text text text text text text text text text text text text text 

if you use keywords "sentence" and "should", The first part of the code picks up: 

text text .  GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED

second part of the code will find the keyword "should" and return that keyword_2='present'

 

 

Here is the working code: 

data want_1;
 set have;
 prxid=prxparse('/([^\.]|\.(?=\d))+\./o');
 start=1;stop=length(textstring);
 call prxnext(prxid,start,stop,textstring,position,length);

do while(position>0);
  hh=substr(textstring,position,length);
  if find(hh,'keyword_1','i') then output;
  call prxnext(prxid,start,stop,textstring,position,length);
 end;
drop start stop position length prxid;

run;

data want_2;
set want_1;
do i=1 to countw(hh," ");
   if scan(hh,i," ")="keyword_1"  then do;
      beforei=scan(hh,i-1," ");
	  beforei2=scan(hh,i-2," ");
	  beforei3=scan(hh,i-3," ");
	  beforei4=scan(hh,i-4," ");
	  beforei5=scan(hh,i-5," ");
	  afteri=scan(hh,i+1," ");
	  afteri2=scan(hh,i+2," ");
	  afteri3=scan(hh,i+3," ");
	  afteri4=scan(hh,i+4," ");
	  afteri5=scan(hh,i+5," ");
	j=trim(left(beforei5)) ||' '||trim(left(beforei4)) ||' '||trim(left(beforei3)) ||' '|| trim(left(beforei2)) ||' '|| trim(left(beforei)) ||' '|| 'keyword_1' ||' '|| trim(left(afteri)) ||' '|| trim(left(afteri2)) ||' '|| trim(left(afteri3)) ||' '|| trim(left(afteri4)) ||' '|| trim(left(afteri5));
    end;

end;
keyword_2='absent';
if prxmatch( '/\s(keyword_2)\s/i', j) then keyword_2='Present';
run;

Here is an attempt at converting it to macro:


 
%MACRO PRTMAC(string, match_a, match_b, wn);

	%local start stop position position; 
	%local RegexID found;
	%local i x st en;
	%global string_temp_1 string_temp_2 string_temp_3 retuen_variable;
	%let RegexID=%sysfunc(prxparse('/([^\.]|\.(?=\d))+\./o'));
	%let start=1;
	%let position=0;
	%let length=0;
	%let stop=%length(&string);

	
%syscall prxnext(RegexID,start,stop,string,position,length);

 %do %while(&position>0);
 %let string_temp_1=substr(&string,&position,&length);
	  %if %sysfunc(find(&string_temp_1,match_a)) %then %let string_temp_2=&string_temp_1;
	  %syscall prxnext(RegexID,start,stop,string,position,length);
 %end;


%do i=1 %to %sysfunc(countw(&string_temp_2," "));
   	%if %sysfunc(scan(&string_temp_2,i," "))=&match_a %then %do;
		%if &wn > &i %then %let st=1; 
			%else %let st=&i-&wn;
		%if &wn+&i > countw(&string_temp_2," ") %then %let en=countw(&string_temp_2," "); 
			%else %let en=&wn;
		%do x=&st %to &en;
			%let string_temp3=&string_temp3 ||' '|| %sysfunc(scan(string_temp_2,i-x," "));
		%end;
	%end;
%end;

%if %sysfunc(prxmatch( '/\s(match_b)\s/i', string_temp3)) %then %let retuen_variable='Present';
	%else %let retuen_variable='Absent';
data d;
set analyse;
String_1=&string_temp_1;
String_2=&string_temp_2;
String_3=&string_temp_3;
Return=&retuen_variable;
run;


%mend;

data want;
set have;

%PRTMAC(result, "keyword", "test", 3);


run;

I am not getting this to work, any suggestion on any changes pease?

Kind regards

 

1 ACCEPTED SOLUTION

Accepted Solutions
Patrick
Opal | Level 21

@ammarhm

Compare below macro with what you've posted. Do you get the difference?


options mprint;
%macro prtmac(string, match_a, match_b,wn, target_var);

  %global __counter;
  %let __counter=%eval(&__counter+1);

  retain _prxid_&__counter;
  if _n_=1 then _prxid_&__counter=prxparse("/(\b\w+\b\s+){0,&wn}(\b&match_a\b\s+)(\b\w+\b\s*){0,&wn}/i");
  _start=1;
  _stop=length(&string);
  call prxnext(_prxid_&__counter, _start, _stop, &string, _pos, _len);
  &target_var= findw(substrn(&string, _pos, _len),"&match_b",1,' ','i') > 0;
  call missing(_pos, _len);
  drop _prxid_&__counter _start _stop _pos _len;
	 
%mend;

data want;
  set have;
  %prtmac(string, sentence, extract, 5, found1)
  %prtmac(string, sentence, extracted, 5, found2)
run;

View solution in original post

17 REPLIES 17
WarrenKuhfeld
Rhodochrosite | Level 12

My suggestion is to do it in a DATA step.  I have written some long macros.  To me, the more I use vanilla SAS and the less I use macro, the better the macro is written.  DATA step is really good at processing text that come from a user that might contain who knows what characters.  Macro with all of its quoting functions is more difficult.  If you need variations on a theme, use LINK and RETURN.  I did not try to figure out what you are doing wrong in macro, because I would never approach the problem that way.

Shmuel
Garnet | Level 18

Macro program is intended to generate code.

You have already the working code.

To convert your code into macro program, you should define,

which parts of your program should be dynamic - 

I suppose: name of input and output datasets, text arguments to search for, etc.

 

Why do you want to convert the search code into macro code ?

art297
Opal | Level 21

I don't think your "working" data step version works. Using the following example, want_1 ends up with NO records:

data have;
  informat textstring $512.;
  input textstring & (keyword_1 keyword_2) ($);
  cards;
text text text text text text text text text text text text text text text text text text text text text text text text text text text text 5text. 4text. 3text. 2text. 1text.  GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED. atext. btext. ctext. dtext. etext. text text text text text text text text text text text text text text  sentence  should
;

data want_1;
 set have;
 prxid=prxparse('/([^\.]|\.(?=\d))+\./o');
 start=1;stop=length(textstring);
 call prxnext(prxid,start,stop,textstring,position,length);

 do while(position>0);
   hh=substr(textstring,position,length);
   if find(hh,'keyword_1','i') then output;
   call prxnext(prxid,start,stop,textstring,position,length);
 end;
 drop start stop position length prxid;

run;

Art, CEO, AnalystFinder.com

ammarhm
Lapis Lazuli | Level 10

@art297 I dont think the code YOU added at the top is dooing what is expected to do, if you look at your "have" table, it dose not contain the sentence that I am interested in

 

Also, put the keyword directly in the code I wrote, and IT DOSE WORK!

data want_1;
 set have;
 prxid=prxparse('/([^\.]|\.(?=\d))+\./o');
 start=1;stop=length(textstring);
 call prxnext(prxid,start,stop,textstring,position,length);

 do while(position>0);
   hh=substr(textstring,position,length);
   if find(hh,'sentence','i') then output;
   call prxnext(prxid,start,stop,textstring,position,length);
 end;
 drop start stop position length prxid;

run;
art297
Opal | Level 21

You have to provide your dataset HAVE in order for any of us to understand what your are trying to show. Since you hadn't done that initially, I had to guess at what you were trying to describe.

 

Art, CEO, AnalystFinder.com

 

ammarhm
Lapis Lazuli | Level 10

here you go 🙂


data have;

textstring="text text text text text text text text text text text text text text text text text GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED. text text 5text. 4text. 3text. 2text. 1text.  GOOD SENTENCE HERE THAT SHOULD BE EXTRACTED. atext. btext. ctext. dtext. etext. text text text text text text text text text text text text text text  ";
run;
data want_1;
 set have;
 prxid=prxparse('/([^\.]|\.(?=\d))+\./o');
 start=1;stop=length(textstring);
 call prxnext(prxid,start,stop,textstring,position,length);

 do while(position>0);
   hh=substr(textstring,position,length);
   if find(hh,'sentence','i') then output;
   call prxnext(prxid,start,stop,textstring,position,length);
 end;
 drop start stop position length prxid;

run;




data want_2;
set want_1;
do i=1 to countw(hh," ");
   if scan(lowcase(hh),i," ")="sentence"  then do;
      beforei=scan(hh,i-1," ");
	  beforei2=scan(hh,i-2," ");
	  beforei3=scan(hh,i-3," ");
	  beforei4=scan(hh,i-4," ");
	  beforei5=scan(hh,i-5," ");
	  afteri=scan(hh,i+1," ");
	  afteri2=scan(hh,i+2," ");
	  afteri3=scan(hh,i+3," ");
	  afteri4=scan(hh,i+4," ");
	  afteri5=scan(hh,i+5," ");
	j=trim(left(beforei5)) ||' '||trim(left(beforei4)) ||' '||trim(left(beforei3)) ||' '|| trim(left(beforei2)) ||' '|| trim(left(beforei)) ||' '|| 'keyword_1' ||' '|| trim(left(afteri)) ||' '|| trim(left(afteri2)) ||' '|| trim(left(afteri3)) ||' '|| trim(left(afteri4)) ||' '|| trim(left(afteri5));
    end;

end;
keyword_2='absent ';
if prxmatch( '/\s(should)\s/i', j) then keyword_2='Present';
run;

Patrick
Opal | Level 21

@ammarhm

Get your SAS code as clean and simple as possible before you start macrotizing it.

Below a RegEx which only returns a string if it finds your first keyword. It also only returns max 5 words before and after your keyword which are in the same sentence.

data have;
  infile datalines truncover ;
  input _str $255.;
  length string $ 2000;
  retain string;
  string=catx(' ',string,_str);
  if _n_=10 then output;
  drop _str;
  datalines;
text text text text text text text text text 
text .text text text text text 
text text text text .
text text text text text text text text 
text .text text text text text .  
GOOD SENTENCE HERE THAT 
SHOULD BE EXTRACTED. text 
text text text text text text 
text text text text text text 
text text text text text text 
;
run;

data want;
  set have;
  retain _prxid;
  if _n_=1 then _prxid=prxparse('/(\b\w+\b\s+){0,5}(\bsentence\b\s+)(\b\w+\b\s*){0,5}/i');
  _start=1;
  _stop=length(string);
  call prxnext(_prxid, _start, _stop, string, _pos, _len);
/*  sentence=substrn(string, _pos, _len);*/
  found= findw(substrn(string, _pos, _len),'extracted',1,' ','i') > 0;
  drop _prxid _start _stop _pos _len;
run;

 

ammarhm
Lapis Lazuli | Level 10

@Patrick

Thank you, very clever way of putting it together.

How would you please convert this part to a macro to input keyword_1 and keyword_2 and then output the result of the macro (found) to a new column?

 


data want;
  set have;
  retain _prxid;
  if _n_=1 then _prxid=prxparse('/(\b\w+\b\s+){0,5}(\bkeyword_1\b\s+)(\b\w+\b\s*){0,5}/i');
  _start=1;
  _stop=length(string);
  call prxnext(_prxid, _start, _stop, string, _pos, _len);
/*  sentence=substrn(string, _pos, _len);*/
  found= findw(substrn(string, _pos, _len),'keyword_2',1,' ','i') > 0;
  drop _prxid _start _stop _pos _len;
run;

Kind regards

 

ammarhm
Lapis Lazuli | Level 10

Here is what I have tried so far without luck, with (wn) being numerical representing number of words to extract:

 

%MACRO PRTMAC(string, match_a, match_b,wn);

	%local _prxid _start _stop  _pos _len; 
	%local sentence found;
	%if _n_=1 %then %let _prxid=%sysfunc(prxparse('/(\b\w+\b\s+){0,wn}(\b&match_a\b\s+)(\b\w+\b\s*){0,wn}/i'));
	%let _start=1;
	%let _pos=0;
	%let len=0;
	%let _stop=%length(&string);
%syscall prxnext(_prxid, _start, _stop, string, _pos, _len);
%let sentence=%sysfunc(substrn(&string, &_pos, &_len));
%if %sysfunc(findw(&sentence,&match_b,1,' ','i'))> 0  %then %let found=1;
	 
%mend;

However, this is still generating an error and I cant return the value of "found"

Any help is appreciated

 

Patrick
Opal | Level 21

@ammarhm

Please answer first the questions @Shmuel asked. 

 

The macro code you've posted indicates that you eventually don't sufficiently understand how SAS macros should be used and how they work - "Macro program is intended to generate code"

 

In order to really help you with a SAS macro, we need first to understand how you intend to use it / what problem you're trying to solve that requires a SAS macro - "Why do you want to convert the search code into macro code?"

 

ammarhm
Lapis Lazuli | Level 10

@Patrick

 

 The reason is that I need to use this search function multiple times in a code i am writing, it seems easier to use a macro or function i can call everytime i need the code instead of copying the whole code multiple times in the code

 

I have a table that contains multiple columns, each containing text that needs to be searched with different keywords. The final code would need to repeat this process >50 times, therefore pasting the same code as above 50 times but with different keywords is probably no an effective way of doing things

I hope this explains things

Patrick
Opal | Level 21

@ammarhm

It does. The SAS macro should then only generate SAS code so all this code should remain on Base SAS code level. The only thing you need to macrotize are some variable names.

ammarhm
Lapis Lazuli | Level 10

@Patrick

isnt that what I am trying to achieve? making the variable names changeable..

 

so what I am trying to achieve

 

 


Data want;
set have;
.
.
.
column_found_1=%macro(column_1,keyword_1,keyword_2)
.
.
.
column_found_n=%macro(column_n,keyword_x,keyword_y);
run;
Patrick
Opal | Level 21

@ammarhm

Compare below macro with what you've posted. Do you get the difference?


options mprint;
%macro prtmac(string, match_a, match_b,wn, target_var);

  %global __counter;
  %let __counter=%eval(&__counter+1);

  retain _prxid_&__counter;
  if _n_=1 then _prxid_&__counter=prxparse("/(\b\w+\b\s+){0,&wn}(\b&match_a\b\s+)(\b\w+\b\s*){0,&wn}/i");
  _start=1;
  _stop=length(&string);
  call prxnext(_prxid_&__counter, _start, _stop, &string, _pos, _len);
  &target_var= findw(substrn(&string, _pos, _len),"&match_b",1,' ','i') > 0;
  call missing(_pos, _len);
  drop _prxid_&__counter _start _stop _pos _len;
	 
%mend;

data want;
  set have;
  %prtmac(string, sentence, extract, 5, found1)
  %prtmac(string, sentence, extracted, 5, found2)
run;

sas-innovate-2024.png

Join us for SAS Innovate April 16-19 at the Aria in Las Vegas. Bring the team and save big with our group pricing for a limited time only.

Pre-conference courses and tutorials are filling up fast and are always a sellout. Register today to reserve your seat.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 17 replies
  • 1896 views
  • 4 likes
  • 5 in conversation