Solved: Re: How to transpose and calculate percent of total by level

lydiawawa · Posted 02-29-2020 12:02 AM

Hi,

I have a dataset similar to the following structure (no duplicates) :

State county language state_total county_total

NJ aaa Span 1 1

MD bbb Span 2 1

MD ccc Span 2 1

NY aaa Arabic 15 6

NY aaa Chinese 15 9

CO ccc French 8 4

CO bbb French 7 4

I hope to transform the dataset by language and with percent of total by state and county calculated in the transposed dataset. The final outcome should look like this:

I tried to construct an array but got really stuck. Any help will be appreciated! Thank you!

ed_sas_member · Posted 02-29-2020 09:47 AM

Hi @lydiawawa

Thank you for the clarification!

Does the following code makes sense?

data have;
	input State $ county $ language $ state_total county_total;
	datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;

proc sort data=have;
	by state county;
run;

/* State */

proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
	var state_total;
	id language;
	by state county ;
run;

data have_tr_state2;
	set have_tr_state;
	array num (*) _numeric_;
	s = sum(of num(*));
	do i=1 to dim(num);
		if num(i) = . then num(i)=0;
		num(i) = num(i) / s;
	end;
	drop i s;
run;

/* County */

proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
	var county_total;
	id language;
	by state county ;
run;

data have_tr_county2;
	set have_tr_county;
	array num (*) _numeric_;
	s = sum(of num(*));
	do i=1 to dim(num);
		if num(i) = . then num(i)=0;
		num(i) = num(i) / s;
	end;
	drop i s;
run;

/* Merge */

data have_tr;
	merge have_tr_state2 have_tr_county2;
	by state county ;
	format state_: county_: 8.2;
run;

proc sql;
	select a.state, a.county, a.language,
		   b.state_Span, b.county_Span,
		   b.state_Arabic, b.county_Arabic,
		   b.state_Chinese, b.county_Chinese,
		   b.state_French, b.county_French			
	from have as a inner join have_tr as b
	on a.State=b.State and a.County=b.County;
quit;

View solution in original post

ed_sas_member · Posted 02-29-2020 03:10 AM

Hi @lydiawawa

Here is an attempt to achieve this.

I am not sure of the calculation of the percentages. Could you please clarify what should be the denominator?

Hope this helps,

Best,

data have;
	input State $ county $ language $ state_total county_total;
	datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;

proc sort data=have;
	by state county;
run;

/* State */

proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
	var state_total;
	id language;
	by state county ;
run;

data have_tr_state2;
	set have_tr_state;
	array num (*) _numeric_;
	do i=1 to dim(num);
		if num(i) = . then num(i)=0;
		num(i) = num(i) / sum(of num(*));
	end;
	drop i;
run;

/* County */

proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
	var county_total;
	id language;
	by state county ;
run;

data have_tr_county2;
	set have_tr_county;
	array num (*) _numeric_;
	do i=1 to dim(num);
		if num(i) = . then num(i)=0;
		num(i) = num(i) / sum(of num(*));
	end;
	drop i;
run;

/* Merge */

data have_tr;
	merge have_tr_state2 have_tr_county2;
	by state county ;
	format state_: county_: 8.2;
run;

proc sql;
	select a.state, a.county, a.language,
		   b.state_Span, b.county_Span,
		   b.state_Arabic, b.county_Arabic,
		   b.state_Chinese, b.county_Chinese,
		   b.state_French, b.county_French			
	from have as a inner join have_tr as b
	on a.State=b.State and a.County=b.County;
quit;

lydiawawa · Posted 02-29-2020 09:34 AM

the denominator should be the sum by level. For example, for state MD, county bbb, the percent of people speak Span(spanish) in the state of MD is 1 (4/4 = 100 %), thus state_span is 1, and the percent of people who speak spanish in MD and who belong to county bbb, county_span, is 0.5(50%) because 1/2 = 0.5. Hope this helps. Thank you so much!

lydiawawa · Posted 02-29-2020 09:40 AM

Sorry I made a mistake, in the county calculation in the previous example. The percent of people who speak spanish in MD and who belong to county bbb, county_span, should be 100% because in county bbb of MD, 1/1 =100% speak Spanish.

ed_sas_member · Posted 02-29-2020 09:47 AM

Hi @lydiawawa

Thank you for the clarification!

Does the following code makes sense?

data have;
	input State $ county $ language $ state_total county_total;
	datalines;
NJ aaa Span 1 1
MD bbb Span 2 1
MD ccc Span 2 1
NY aaa Arabic 15 6
NY aaa Chinese 15 9
CO ccc French 8 4
CO bbb French 7 4
;
run;

proc sort data=have;
	by state county;
run;

/* State */

proc transpose data=have out=have_tr_state (drop=_name_) prefix=state_;
	var state_total;
	id language;
	by state county ;
run;

data have_tr_state2;
	set have_tr_state;
	array num (*) _numeric_;
	s = sum(of num(*));
	do i=1 to dim(num);
		if num(i) = . then num(i)=0;
		num(i) = num(i) / s;
	end;
	drop i s;
run;

/* County */

proc transpose data=have out=have_tr_county (drop=_name_) prefix=county_;
	var county_total;
	id language;
	by state county ;
run;

data have_tr_county2;
	set have_tr_county;
	array num (*) _numeric_;
	s = sum(of num(*));
	do i=1 to dim(num);
		if num(i) = . then num(i)=0;
		num(i) = num(i) / s;
	end;
	drop i s;
run;

/* Merge */

data have_tr;
	merge have_tr_state2 have_tr_county2;
	by state county ;
	format state_: county_: 8.2;
run;

proc sql;
	select a.state, a.county, a.language,
		   b.state_Span, b.county_Span,
		   b.state_Arabic, b.county_Arabic,
		   b.state_Chinese, b.county_Chinese,
		   b.state_French, b.county_French			
	from have as a inner join have_tr as b
	on a.State=b.State and a.County=b.County;
quit;

lydiawawa · Posted 02-29-2020 01:34 PM

I will let you know by Monday. Thank you so much!

Ready to join fellow brilliant minds for the SAS Hackathon?

Classroom Training Available!