Hi,
I have a dataset similar to the following structure (no duplicates) :
State county language state_total county_total
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
I hope to transform the dataset by language and with percent of total by state and county calculated in the transposed dataset. The final outcome should look like this:
I tried to construct an array but got really stuck. Any help will be appreciated! Thank you!
Hi @lydiawawa
Thank you for the clarification!
Does the following code makes sense?
data have;
input State $ county $ language $ state_total county_total;
datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;
proc sort data=have;
by state county;
run;
/* State */
proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
var state_total;
id language;
by state county ;
run;
data have_tr_state2;
set have_tr_state;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* County */
proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
var county_total;
id language;
by state county ;
run;
data have_tr_county2;
set have_tr_county;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* Merge */
data have_tr;
merge have_tr_state2 have_tr_county2;
by state county ;
format state_: county_: 8.2;
run;
proc sql;
select a.state, a.county, a.language,
b.state_Span, b.county_Span,
b.state_Arabic, b.county_Arabic,
b.state_Chinese, b.county_Chinese,
b.state_French, b.county_French
from have as a inner join have_tr as b
on a.State=b.State and a.County=b.County;
quit;
Hi @lydiawawa
Here is an attempt to achieve this.
I am not sure of the calculation of the percentages. Could you please clarify what should be the denominator?
Hope this helps,
Best,
data have;
input State $ county $ language $ state_total county_total;
datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;
proc sort data=have;
by state county;
run;
/* State */
proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
var state_total;
id language;
by state county ;
run;
data have_tr_state2;
set have_tr_state;
array num (*) _numeric_;
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / sum(of num(*));
end;
drop i;
run;
/* County */
proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
var county_total;
id language;
by state county ;
run;
data have_tr_county2;
set have_tr_county;
array num (*) _numeric_;
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / sum(of num(*));
end;
drop i;
run;
/* Merge */
data have_tr;
merge have_tr_state2 have_tr_county2;
by state county ;
format state_: county_: 8.2;
run;
proc sql;
select a.state, a.county, a.language,
b.state_Span, b.county_Span,
b.state_Arabic, b.county_Arabic,
b.state_Chinese, b.county_Chinese,
b.state_French, b.county_French
from have as a inner join have_tr as b
on a.State=b.State and a.County=b.County;
quit;
Hi @lydiawawa
Thank you for the clarification!
Does the following code makes sense?
data have;
input State $ county $ language $ state_total county_total;
datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;
proc sort data=have;
by state county;
run;
/* State */
proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
var state_total;
id language;
by state county ;
run;
data have_tr_state2;
set have_tr_state;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* County */
proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
var county_total;
id language;
by state county ;
run;
data have_tr_county2;
set have_tr_county;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* Merge */
data have_tr;
merge have_tr_state2 have_tr_county2;
by state county ;
format state_: county_: 8.2;
run;
proc sql;
select a.state, a.county, a.language,
b.state_Span, b.county_Span,
b.state_Arabic, b.county_Arabic,
b.state_Chinese, b.county_Chinese,
b.state_French, b.county_French
from have as a inner join have_tr as b
on a.State=b.State and a.County=b.County;
quit;
Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 25. Read more here about why you should contribute and what is in it for you!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.