I have the following data and I need to create a variable HHI which is the sum of the market share of four largest firms in each industry.
INDUSTRY YEAR FIRM SALES
1 2001 A 100
1 2001 B 200
1 2001 C 150
1 2001 D 50
1 2001 E 100
1 2001 F 400
1 2002 A 100
1 2002 B 130
1 2002 C 170
1 2002 D 190
1 2002 E 108
1 2002 F 100
2 2001 S 150
2 2001 T 250
2 2001 U 570
2 2001 V 550
2 2001 W 750
2 2001 X 560
2 2001 Y 450
I want to have a data set that looks like this:
INDUSTRY YEAR FIRM SALES sum_industry_sales large4sales market share weight (market share*market share) HHI
1 2001 A 50 1000 . . . 0.04+0.0225+0.0225+0.16=0.245
1 2001 B 200 1000 200 200/1000= 0.2 0.04 0.245
1 2001 C 150 1000 150 0.15 0.0225 0.245
1 2001 D 50 1000 . . . 0.245
1 2001 E 150 1000 150 0.15 0.0225 0.245
1 2001 F 400 1000 400 0.4 0.16 0.245
1 2002 A 100 798 . . . 0.15
1 2002 B 130 798 130 0.16 0.03 0.15
1 2002 C 170 798 170 0.21 0.04 0.15
1 2002 D 190 798 190 0.24 0.06 0.15
1 2002 E 108 798 108 0.14 0.02 0.15
1 2002 F 100 798 . . . 0.15
2 2001 S 150 3280 . . . 0.14
2 2001 T 250 3280 . . . 0.14
2 2001 U 570 3280 570 0.17 0.03 0.14
2 2001 V 550 3280 550 0.16 0.03 0.14
2 2001 W 750 3280 750 0.22 0.05 0.14
2 2001 X 560 3280 560 0. 17 0.03 0.14
2 2001 Y 450 3280 . . . 0.14
data have;
input INDUSTRY :YEAR : FIRM :$ SALES;
cards;
1 2001 A 100
1 2001 B 200
1 2001 C 150
1 2001 D 50
1 2001 E 100
1 2001 F 400
1 2002 A 100
1 2002 B 130
1 2002 C 170
1 2002 D 190
1 2002 E 108
1 2002 F 100
2 2001 S 150
2 2001 T 250
2 2001 U 570
2 2001 V 550
2 2001 W 750
2 2001 X 560
2 2001 Y 450
;
proc sql;
create table temp as select *,sum(sales) as subtotal,sales/calculated subtotal as m_share,
calculated m_share*calculated m_share as weight
from have
group by industry, year
order by industry, year, sales desc;
quit;
data temp(drop=count);
set temp;
large4sales=sales;
by industry year;
if first.year then count=0;
count+1;
if count>4 then do;
large4sales=.;
m_share=.;
weight=.;
end;
run;
proc sql;
create table want
as select *,sum(weight) as HHI
from temp
group by industry, year
order by industry, year,firm;
quit;
proc print;run;
Obs INDUSTRY YEAR FIRM SALES subtotal m_share weight large4sales HHI
1 1 2001 A 100 1000 0.10000 0.01000 100 0.23250
2 1 2001 B 200 1000 0.20000 0.04000 200 0.23250
3 1 2001 C 150 1000 0.15000 0.02250 150 0.23250
4 1 2001 D 50 1000 . . . 0.23250
5 1 2001 E 100 1000 . . . 0.23250
6 1 2001 F 400 1000 0.40000 0.16000 400 0.23250
7 1 2002 A 100 798 . . . 0.14693
8 1 2002 B 130 798 0.16291 0.02654 130 0.14693
9 1 2002 C 170 798 0.21303 0.04538 170 0.14693
10 1 2002 D 190 798 0.23810 0.05669 190 0.14693
11 1 2002 E 108 798 0.13534 0.01832 108 0.14693
12 1 2002 F 100 798 . . . 0.14693
13 2 2001 S 150 3280 . . . 0.13975
14 2 2001 T 250 3280 . . . 0.13975
15 2 2001 U 570 3280 0.17378 0.03020 570 0.13975
16 2 2001 V 550 3280 0.16768 0.02812 550 0.13975
17 2 2001 W 750 3280 0.22866 0.05228 750 0.13975
18 2 2001 X 560 3280 0.17073 0.02915 560 0.13975
19 2 2001 Y 450 3280 . . . 0.13975
data have;
input INDUSTRY :YEAR : FIRM :$ SALES;
cards;
1 2001 A 100
1 2001 B 200
1 2001 C 150
1 2001 D 50
1 2001 E 100
1 2001 F 400
1 2002 A 100
1 2002 B 130
1 2002 C 170
1 2002 D 190
1 2002 E 108
1 2002 F 100
2 2001 S 150
2 2001 T 250
2 2001 U 570
2 2001 V 550
2 2001 W 750
2 2001 X 560
2 2001 Y 450
;
proc sql;
create table temp as select *,sum(sales) as subtotal,sales/calculated subtotal as m_share,
calculated m_share*calculated m_share as weight
from have
group by industry, year
order by industry, year, sales desc;
quit;
data temp(drop=count);
set temp;
large4sales=sales;
by industry year;
if first.year then count=0;
count+1;
if count>4 then do;
large4sales=.;
m_share=.;
weight=.;
end;
run;
proc sql;
create table want
as select *,sum(weight) as HHI
from temp
group by industry, year
order by industry, year,firm;
quit;
proc print;run;
Obs INDUSTRY YEAR FIRM SALES subtotal m_share weight large4sales HHI
1 1 2001 A 100 1000 0.10000 0.01000 100 0.23250
2 1 2001 B 200 1000 0.20000 0.04000 200 0.23250
3 1 2001 C 150 1000 0.15000 0.02250 150 0.23250
4 1 2001 D 50 1000 . . . 0.23250
5 1 2001 E 100 1000 . . . 0.23250
6 1 2001 F 400 1000 0.40000 0.16000 400 0.23250
7 1 2002 A 100 798 . . . 0.14693
8 1 2002 B 130 798 0.16291 0.02654 130 0.14693
9 1 2002 C 170 798 0.21303 0.04538 170 0.14693
10 1 2002 D 190 798 0.23810 0.05669 190 0.14693
11 1 2002 E 108 798 0.13534 0.01832 108 0.14693
12 1 2002 F 100 798 . . . 0.14693
13 2 2001 S 150 3280 . . . 0.13975
14 2 2001 T 250 3280 . . . 0.13975
15 2 2001 U 570 3280 0.17378 0.03020 570 0.13975
16 2 2001 V 550 3280 0.16768 0.02812 550 0.13975
17 2 2001 W 750 3280 0.22866 0.05228 750 0.13975
18 2 2001 X 560 3280 0.17073 0.02915 560 0.13975
19 2 2001 Y 450 3280 . . . 0.13975
Thanks a lot.
Available on demand!
Missed SAS Innovate Las Vegas? Watch all the action for free! View the keynotes, general sessions and 22 breakouts on demand.
Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.
Find more tutorials on the SAS Users YouTube channel.