Solved: Parse a string with words as delimiters

34reqrwe · Posted 02-26-2019 02:15 PM

Hi,

I have a long text string which I would like to do the following.

1) find the position of each occurence of the text 'cn='

2) extract the text occuring after 'cn=' up until the next comma

3) for each occurence of 'cn=' within the string, create a new column with a 1/0 (or 1/missing) indicator . That column would take the name of the resulting text in 2) .

example code

data have;
	length string $200.;
	id=111;
	string = 'xxx,xxcn=test1,yyy,zz,cn=test2,dfd'; output;
	id=222;
	string = 'xadfds , xx,xxcn=test3,yyy,zz,cn=test2,dfd'; output;
run;

an example of what I want

data want;
	id = 111 ; test1 = 1 ; test2 = 1 ; test3=.; output;
	id = 222; test1 =  . ; test2 = 1; test3 =1; output;
run;

thanks

novinosrin · Posted 02-26-2019 03:43 PM


data have;
	length string $200.;
	id=111;
	string = 'xxx,xxcn=test1,yyy,zz,cn=test2,dfd'; output;
	id=222;
	string = 'xadfds , xx,xxcn=test3,yyy,zz,cn=test2,dfd'; output;
run;

data _null_;
set have end=lr;
do pos=find(string,'cn=') by 0 while(pos);
k=input(substr(string,pos+7,1),8.);
m=max(m,k);
pos = find(string, 'cn=', pos + 8) ; 
end;
max=max(m,max);
retain max;
if lr;
call symputx('n',max);
run;

%put &n;

data want;
set have;
array test(&n);
do pos=find(string,'cn=') by 0 while(pos);
t=substr(string,pos+3,5);
 do i=1 to dim(test);
	if vname(test(i))=t then test(i)=1;
end;
pos = find(string, 'cn=', pos + 8) ; 
end;
keep id test:;
run;

View solution in original post

novinosrin · Posted 02-26-2019 03:43 PM


data have;
	length string $200.;
	id=111;
	string = 'xxx,xxcn=test1,yyy,zz,cn=test2,dfd'; output;
	id=222;
	string = 'xadfds , xx,xxcn=test3,yyy,zz,cn=test2,dfd'; output;
run;

data _null_;
set have end=lr;
do pos=find(string,'cn=') by 0 while(pos);
k=input(substr(string,pos+7,1),8.);
m=max(m,k);
pos = find(string, 'cn=', pos + 8) ; 
end;
max=max(m,max);
retain max;
if lr;
call symputx('n',max);
run;

%put &n;

data want;
set have;
array test(&n);
do pos=find(string,'cn=') by 0 while(pos);
t=substr(string,pos+3,5);
 do i=1 to dim(test);
	if vname(test(i))=t then test(i)=1;
end;
pos = find(string, 'cn=', pos + 8) ; 
end;
keep id test:;
run;

34reqrwe · Posted 02-26-2019 04:07 PM

Thanks very much for taking the time to look at this . I should have been more prescriptive in my initial example and mentioned that the length of the text after 'cn=' can be a different length each time . e.g.

data have;
	length string $200.;
	id=111;
	string = 'xxx,xxcn=test1,yyy,zz,cn=test2,dfd'; output;
	id=222;
	string = 'xadfds , xx,xxcn=test3,yyy,zz,cn=test2,dfd'; output;
	id=333;
	string = 'xadfds , xx,xxcn=test3,yyy,zz,cn=somthingdiff,dfd'; output;
run;

But I can use your example to get what I need thanks

novinosrin · Posted 02-26-2019 04:08 PM

Yes please post the best representative sample. Typically that happens a lot which I call ding dong(back and forth)

Ksharp · Posted 02-27-2019 08:12 AM

data have;
	length string $200.;
	id=111;
	string = 'xxx,xxcn=test1,yyy,zz,cn=test2,dfd'; output;
	id=222;
	string = 'xadfds , xx,xxcn=test3,yyy,zz,cn=test2,dfd'; output;
run;
data temp;
 set have;
retain value 1;
pid=prxparse('/(?<=cn=)\w+/');
s=1;e=length(string);
call prxnext(pid,s,e,string,p,l);
do while(p>0);
 temp=substr(string,p,l);output;
 call prxnext(pid,s,e,string,p,l);
end;
drop pid s e p l string;
run;
proc transpose data=temp out=want;
by id;
id temp;
var value;
run;

34reqrwe · Posted 02-27-2019 03:13 PM

thats awesome. thanks

Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Registration is open

Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Re: Parse a string with words as delimiters

Registration is open

SAS Training: Just a Click Away