Hi,
I have a dataset similar to the following structure (no duplicates) :
State county language state_total county_total
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
I hope to transform the dataset by language and with percent of total by state and county calculated in the transposed dataset. The final outcome should look like this:
I tried to construct an array but got really stuck. Any help will be appreciated! Thank you!
Hi @lydiawawa
Thank you for the clarification!
Does the following code makes sense?
data have;
input State $ county $ language $ state_total county_total;
datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;
proc sort data=have;
by state county;
run;
/* State */
proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
var state_total;
id language;
by state county ;
run;
data have_tr_state2;
set have_tr_state;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* County */
proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
var county_total;
id language;
by state county ;
run;
data have_tr_county2;
set have_tr_county;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* Merge */
data have_tr;
merge have_tr_state2 have_tr_county2;
by state county ;
format state_: county_: 8.2;
run;
proc sql;
select a.state, a.county, a.language,
b.state_Span, b.county_Span,
b.state_Arabic, b.county_Arabic,
b.state_Chinese, b.county_Chinese,
b.state_French, b.county_French
from have as a inner join have_tr as b
on a.State=b.State and a.County=b.County;
quit;
Hi @lydiawawa
Here is an attempt to achieve this.
I am not sure of the calculation of the percentages. Could you please clarify what should be the denominator?
Hope this helps,
Best,
data have;
input State $ county $ language $ state_total county_total;
datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;
proc sort data=have;
by state county;
run;
/* State */
proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
var state_total;
id language;
by state county ;
run;
data have_tr_state2;
set have_tr_state;
array num (*) _numeric_;
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / sum(of num(*));
end;
drop i;
run;
/* County */
proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
var county_total;
id language;
by state county ;
run;
data have_tr_county2;
set have_tr_county;
array num (*) _numeric_;
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / sum(of num(*));
end;
drop i;
run;
/* Merge */
data have_tr;
merge have_tr_state2 have_tr_county2;
by state county ;
format state_: county_: 8.2;
run;
proc sql;
select a.state, a.county, a.language,
b.state_Span, b.county_Span,
b.state_Arabic, b.county_Arabic,
b.state_Chinese, b.county_Chinese,
b.state_French, b.county_French
from have as a inner join have_tr as b
on a.State=b.State and a.County=b.County;
quit;
Hi @lydiawawa
Thank you for the clarification!
Does the following code makes sense?
data have;
input State $ county $ language $ state_total county_total;
datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;
proc sort data=have;
by state county;
run;
/* State */
proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
var state_total;
id language;
by state county ;
run;
data have_tr_state2;
set have_tr_state;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* County */
proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
var county_total;
id language;
by state county ;
run;
data have_tr_county2;
set have_tr_county;
array num (*) _numeric_;
s = sum(of num(*));
do i=1 to dim(num);
if num(i) = . then num(i)=0;
num(i) = num(i) / s;
end;
drop i s;
run;
/* Merge */
data have_tr;
merge have_tr_state2 have_tr_county2;
by state county ;
format state_: county_: 8.2;
run;
proc sql;
select a.state, a.county, a.language,
b.state_Span, b.county_Span,
b.state_Arabic, b.county_Arabic,
b.state_Chinese, b.county_Chinese,
b.state_French, b.county_French
from have as a inner join have_tr as b
on a.State=b.State and a.County=b.County;
quit;
SAS Innovate 2025 is scheduled for May 6-9 in Orlando, FL. Sign up to be first to learn about the agenda and registration!
Learn how use the CAT functions in SAS to join values from multiple variables into a single value.
Find more tutorials on the SAS Users YouTube channel.
Ready to level-up your skills? Choose your own adventure.