DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
ans needed as
Var1 Var2 Var3 Var4
1. Dinesh Reddy
2. Rajesh Rao Reddy
3. Praveen Kumar Rao Kumar
total we need 4 variables
data have;
input name :$50.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;
data want;
set have;
array names {4} $ var1-var4;
do _pos=anyupper(name)by 0 while(_pos);
_p=_pos;
_pos=anyupper(name,_pos+1);
_n=sum(_n,1);
if _pos then names(_n)=substr(name,_p,_pos-_p);
else names(_n)=substr(name,_p);
end;
drop _:;
run;
proc print noobs;run;
name | var1 | var2 | var3 | var4 |
---|---|---|---|---|
DineshReddy | Dinesh | Reddy | ||
RajeshRaoReddy | Rajesh | Rao | Reddy | |
PraveenKumarRaoKumar | Praveen | Kumar | Rao | Kumar |
Something like this? Your delimiter is an UPPER case letter.
data have;
input name :$50.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;
data want;
set have;
array names {4} $ var1-var4;
do _i=1 to dim(names);
_this_upper=anyupper(name, _this_upper+1);
_next_upper=anyupper(name, _this_upper+1);
if _next_upper=0 then
_next_upper=length(name)+1;
if _this_upper>0 then names[_i]=substr(name, _this_upper, _next_upper-_this_upper);
else
leave;
end;
drop _:;
run;
Here's a commented version -- tried to describe as best as possible..:
data want;
set have;
array names {4} $ var1-var4; *Create an array var1-var4;
do _i=1 to dim(names); *Work across arrays i=1 to 4 in this case;
*Use anyupper function to get positions of uppercase characters;
*First parameter is string to search, second parameter is starting position to search;
*-- returns 0 if there are no more uppercase characters after specified starting position.;
_this_upper=anyupper(name, _this_upper+1); *position (index) of THIS upper character;
_next_upper=anyupper(name, _this_upper+1); *position (index) of NEXT upper character;
if _next_upper=0 then
_next_upper=length(name)+1; *If next upper is 0 (i.e. no more uppercase characters, then change to ending index +1 (helps to get very last chunk out properly);
if _this_upper>0 then names[_i]=substr(name, _this_upper, _next_upper-_this_upper); *If this upper index is > 0 then capture the substr between this upper and the next one;
else
leave; *Else leave and move to next observation (i.e. go from row 1 to row 2);
end;
drop _:; *I've named all of my helper variables starting with '_', this drops all of them.;
run;
Hi @Rakesh93
here is a way to achieve this, using pearl regular expressions.
NB: the proc sql step enables you not to specify arbitrary 4 as the maximum count of words.
data have;
input name $80.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;
data have1;
set have;
_name= prxchange('s/([A-Z])/ $1/',-1,name);
run;
proc sql noprint;
select max(countw(_name)) into:nb_max from have1;
quit;
data want;
set have1;
array var(&nb_max) $20.;
do i=1 to countw(_name);
var(i)=scan(_name,i," ");
end;
drop i _name;
run;
data have;
input name :$50.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;
data want;
set have;
array names {4} $ var1-var4;
do _pos=anyupper(name)by 0 while(_pos);
_p=_pos;
_pos=anyupper(name,_pos+1);
_n=sum(_n,1);
if _pos then names(_n)=substr(name,_p,_pos-_p);
else names(_n)=substr(name,_p);
end;
drop _:;
run;
proc print noobs;run;
name | var1 | var2 | var3 | var4 |
---|---|---|---|---|
DineshReddy | Dinesh | Reddy | ||
RajeshRaoReddy | Rajesh | Rao | Reddy | |
PraveenKumarRaoKumar | Praveen | Kumar | Rao | Kumar |
And a variant of the previous with COMPRESS and FINDC
data want;
set have;
array names {4} $ var1-var4;
_t=strip(compress(name,,'kU'));
do _pos=findc(compress(name),_t)by 0 while(_pos);
_p=_pos;
_pos=findc(compress(name),_t,_pos+1);
_n=sum(_n,1);
if _pos then names(_n)=substr(name,_p,_pos-_p);
else names(_n)=substr(name,_p);
end;
drop _:;
run;
proc print noobs;run;
name | var1 | var2 | var3 | var4 |
---|---|---|---|---|
DineshReddy | Dinesh | Reddy | ||
RajeshRaoReddy | Rajesh | Rao | Reddy | |
PraveenKumarRaoKumar | Praveen | Kumar | Rao | Kumar |
This code loops through the original NAME letter by letter, appending the individual letter to the "current" name component. Every time the loop encounters an uppercase letter, it just increments which word is considered current:
data have;
input name :$30.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
data want (drop=L w);
set have;
array n {4} $10 name1-name4;
do L=1 to length(name);
if char(name,L) = upcase(char(name,L)) then w=sum(w,1);
n{w}=cats(n{w},char(name,L));
end;
run;
data have;
input name :$30.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
data temp;
set have;
id+1;
pid=prxparse('/[A-Z][a-z]+/');
s=1;e=length(name);
call prxnext(pid,s,e,name,p,l);
do while(p>0);
var=substr(name,p,l);
output;
call prxnext(pid,s,e,name,p,l);
end;
keep id var;
run;
proc transpose data=temp out=want prefix=var_;
by id;
var var;
run;
This works
data have;
input names:$50.;
cards;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;
data temp(drop=i);
set have;
do i=1 to length(names)+1;
if (rank(substr(names,i,1)) gt 64 and rank(substr(names,i,1)) lt 91 and i ne 1) or i=length(names)+1 then do;
output;
name=substr(names,i,1);
end;
else name=cats(name,substr(names,i,1));
end;
run;
proc sort data=temp;
by names;
run;
proc transpose data=temp out=want(drop=_name_);
by names;
var name;
run;
Please let us know if it worked for you.
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.
Find more tutorials on the SAS Users YouTube channel.