BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
Rakesh93
Calcite | Level 5

DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar


ans needed as

    Var1       Var2       Var3      Var4
1. Dinesh   Reddy
2. Rajesh   Rao       Reddy
3. Praveen Kumar    Rao      Kumar

total we need 4 variables

1 ACCEPTED SOLUTION

Accepted Solutions
novinosrin
Tourmaline | Level 20

data have;
	input name :$50.;
	datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;

data want;
 set have;
 array names {4} $ var1-var4;
 do _pos=anyupper(name)by 0 while(_pos);
  _p=_pos;
  _pos=anyupper(name,_pos+1);
  _n=sum(_n,1);
  if _pos then  names(_n)=substr(name,_p,_pos-_p);
  else names(_n)=substr(name,_p);
 end;
 drop _:;
run;
proc print noobs;run;
name var1 var2 var3 var4
DineshReddy Dinesh Reddy    
RajeshRaoReddy Rajesh Rao Reddy  
PraveenKumarRaoKumar Praveen Kumar Rao Kumar

View solution in original post

11 REPLIES 11
unison
Lapis Lazuli | Level 10

Something like this? Your delimiter is an UPPER case letter.

data have;
	input name :$50.;
	datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;

data want;
	set have;
	array names {4} $ var1-var4;

	do _i=1 to dim(names);
		_this_upper=anyupper(name, _this_upper+1);
		_next_upper=anyupper(name, _this_upper+1);

		if _next_upper=0 then
			_next_upper=length(name)+1;

		if _this_upper>0 then names[_i]=substr(name, _this_upper, _next_upper-_this_upper);
		else
			leave;
	end;
	drop _:;
run;

 

-unison
Rakesh93
Calcite | Level 5
Can you please explain how does this code works??
unison
Lapis Lazuli | Level 10

Here's a commented version -- tried to describe as best as possible..:

data want;
	set have;
	array names {4} $ var1-var4; *Create an array var1-var4;

	do _i=1 to dim(names); *Work across arrays i=1 to 4 in this case;
		*Use anyupper function to get positions of uppercase characters;
		*First parameter is string to search, second parameter is starting position to search;
		*-- returns 0 if there are no more uppercase characters after specified starting position.;
		_this_upper=anyupper(name, _this_upper+1); *position (index) of THIS upper character;
		_next_upper=anyupper(name, _this_upper+1); *position (index) of NEXT upper character;

		if _next_upper=0 then 
			_next_upper=length(name)+1; *If next upper is 0 (i.e. no more uppercase characters, then change to ending index +1 (helps to get very last chunk out properly);

		if _this_upper>0 then names[_i]=substr(name, _this_upper, _next_upper-_this_upper); *If this upper index is > 0 then capture the substr between this upper and the next one;
		else
			leave; *Else leave and move to next observation (i.e. go from row 1 to row 2);
	end;
	drop _:; *I've named all of my helper variables starting with '_', this drops all of them.;
run;
-unison
ed_sas_member
Meteorite | Level 14

Hi @Rakesh93 

 

here is a way to achieve this, using pearl regular expressions.

NB: the proc sql step enables you not to specify arbitrary 4 as the maximum count of words.

data have;
	input name $80.;
	datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;

data have1;
	set have;
	_name= prxchange('s/([A-Z])/ $1/',-1,name);
run;

proc sql noprint;
	select max(countw(_name)) into:nb_max from have1;
quit;

data want;
	set have1;
	array var(&nb_max) $20.;
	do i=1 to countw(_name);
		var(i)=scan(_name,i," ");
	end;
	drop i _name;
run;

 

novinosrin
Tourmaline | Level 20

data have;
	input name :$50.;
	datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;

data want;
 set have;
 array names {4} $ var1-var4;
 do _pos=anyupper(name)by 0 while(_pos);
  _p=_pos;
  _pos=anyupper(name,_pos+1);
  _n=sum(_n,1);
  if _pos then  names(_n)=substr(name,_p,_pos-_p);
  else names(_n)=substr(name,_p);
 end;
 drop _:;
run;
proc print noobs;run;
name var1 var2 var3 var4
DineshReddy Dinesh Reddy    
RajeshRaoReddy Rajesh Rao Reddy  
PraveenKumarRaoKumar Praveen Kumar Rao Kumar
novinosrin
Tourmaline | Level 20

And a variant of the previous with COMPRESS and FINDC

 

data want;
 set have;
 array names {4} $ var1-var4;
  _t=strip(compress(name,,'kU'));
 do _pos=findc(compress(name),_t)by 0 while(_pos);
  _p=_pos;
  _pos=findc(compress(name),_t,_pos+1);
  _n=sum(_n,1);
  if _pos then  names(_n)=substr(name,_p,_pos-_p);
  else names(_n)=substr(name,_p);
 end;
 drop _:;
run;
proc print noobs;run;
name var1 var2 var3 var4
DineshReddy Dinesh Reddy    
RajeshRaoReddy Rajesh Rao Reddy  
PraveenKumarRaoKumar Praveen Kumar Rao Kumar
mkeintz
PROC Star

@Rakesh93 

 

This code loops through the original NAME letter by letter, appending the individual letter to the "current" name component.  Every time the loop encounters an uppercase letter, it just increments which word is considered current:

 

data have;
  input name :$30.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;

data want (drop=L w);
  set have;
  array n {4} $10 name1-name4;
  do L=1 to length(name);
    if char(name,L) = upcase(char(name,L)) then w=sum(w,1);
    n{w}=cats(n{w},char(name,L));
  end;
run;
--------------------------
The hash OUTPUT method will overwrite a SAS data set, but not append. That can be costly. Consider voting for Add a HASH object method which would append a hash object to an existing SAS data set

Would enabling PROC SORT to simultaneously output multiple datasets be useful? Then vote for
Allow PROC SORT to output multiple datasets

--------------------------
Ksharp
Super User
data have;
  input name :$30.;
datalines;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
data temp;
set have;
id+1;
pid=prxparse('/[A-Z][a-z]+/');
s=1;e=length(name);
call prxnext(pid,s,e,name,p,l);
do while(p>0);
 var=substr(name,p,l);
 output;
call prxnext(pid,s,e,name,p,l);
end;
keep id var;
run;
proc transpose data=temp out=want prefix=var_;
by id;
var var;
run;
Satish_Parida
Lapis Lazuli | Level 10

This works

 

data have;
input names:$50.;
cards;
DineshReddy
RajeshRaoReddy
PraveenKumarRaoKumar
;
run;


data temp(drop=i);
set have;
 do i=1 to length(names)+1;
 	if (rank(substr(names,i,1)) gt 64 and rank(substr(names,i,1)) lt 91 and i ne 1) or i=length(names)+1 then do;
 		output;
 		name=substr(names,i,1);
 	end;
 	else name=cats(name,substr(names,i,1));
 end;
run;

proc sort data=temp;
by names;
run;

proc transpose data=temp out=want(drop=_name_);
by names;
var name;
run;

 

Please let us know if it worked for you.

sas-innovate-2024.png

Available on demand!

Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.

 

Register now!

Mastering the WHERE Clause in PROC SQL

SAS' Charu Shankar shares her PROC SQL expertise by showing you how to master the WHERE clause using real winter weather data.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 11 replies
  • 4849 views
  • 4 likes
  • 8 in conversation