1. Assign the library:
libname paper "~/425/425_Final_Paper";
2. Open the data set:
proc print data=paper.ceo_firm2 (obs=5);
run;
@sastuck wrote:
If this is the case, then which line of code would you recommend I run so as to recreate the work table (?) ceo_firm2 ? Here's the program:
libname paper "~/425/425_Final_Paper";
*import compustat dataset;
PROC IMPORT out=paper.CompuStat_Execucomp
datafile="~/425/425_Final_Paper/CompuStat_Execucomp.csv" DBMS=CSV replace;
getnames=yes;
guessingrows=2000;
run;
*keep only CEOs;
data paper.Compustat_ExecuComp2;
set paper.Compustat_ExecuComp;
if CEOANN='CEO';
run;
*** extra DATA step for checking previous data step results by comparing number of records selected;
data check_CEOANN;
set paper.Compustat_ExecuComp;
if CEOANN=: 'CEO';
run;
proc sort data=paper.Compustat_ExecuComp2;
by ticker year;
run;
*import csrp dataset;
PROC IMPORT out=paper.CSRP_Monthly_Stock_char
datafile="~/425/425_Final_Paper/CSRP_MonthlyStock_char.csv" DBMS=CSV replace;
getnames=yes;
guessingrows=max;
run;
proc contents data=paper.CSRP_Monthly_Stock_char;
run;
*remove bad data;
data paper.CSRP_Monthly_Stock_char2;
set paper.CSRP_Monthly_Stock_char (rename=(ret=character_ret));
drop character_ret;
if cusip=' ' then
delete;
ret=input(character_ret, ??8.);
if ret=. then
delete;
date=input(put(date, z8.), yymmdd8.);
format date yymmdd10.;
year=year(date);
month=month(date);
if cusip=: '?' then
cusip=substr(cusip, 2);
run;
proc contents data=paper.CSRP_Monthly_Stock_char;
run;
proc contents data=paper.CSRP_Monthly_Stock_char2;
run;
proc contents data=paper.multiple_CEOs;
run;
proc sort data=paper.CSRP_Monthly_Stock_char2;
by ticker year;
run;
proc sort data=paper.Compustat_ExecuComp2;
by ticker year;
run;
*Remove all bad years from both data sources;
data paper.Compustat_ExecuComp3;
length ticker $5;
merge paper.Compustat_ExecuComp2 paper.multiple_CEOs (keep=ticker year
in=had_multiple_CEOs);
by ticker year;
if had_multiple_CEOs then
delete;
run;
data paper.CSRP_Monthly_Stock_char3;
merge paper.CSRP_Monthly_Stock_char2 paper.multiple_CEOs (keep=ticker year
in=had_multiple_CEOs);
by ticker year;
if had_multiple_CEOs then
delete;
run;
*find additional bad data: multiple return records for the same month/year;
proc freq data=paper.CSRP_Monthly_Stock_char3;
tables ticker * year * month / noprint
out=paper.multiple_returns (where=(count > 1));
run;
*Remove all matching year data for multiple returns;
proc sort data=paper.multiple_returns out=multiple_returns (keep=ticker year)
NODUPKEY;
by ticker year;
run;
data paper.Compustat_ExecuComp4;
merge paper.Compustat_ExecuComp3 (in=keepme)
multiple_returns (in=had_multiple_returns);
by ticker year;
if keepme;
if had_multiple_returns then
delete;
run;
data paper.CSRP_Monthly_Stock_char4;
length ticker $5;
merge paper.CSRP_Monthly_Stock_char3
multiple_returns (in=had_multiple_returns);
by ticker year;
if had_multiple_returns then
delete;
run;
proc contents data=paper.CSRP_Monthly_Stock_char3;
run;
proc contents data=paper.CSRP_Monthly_Stock_char4;
run;
*create new variable annualized growth;
data paper.CSRP_annual_returns;
set paper.CSRP_Monthly_Stock_char4;
by ticker year;
retain annual_return 1;
annual_return=annual_return * (1 + RET);
if month=12 or last.ticker;
annual_return=(annual_return - 1) * 100;
output;
annual_return=1;
keep ticker year annual_return;
run;
*use proc contents to see if there is a type mismatch;
proc contents data=paper.CSRP_annual_returns;
run;
proc contents data=paper.Compustat_ExecuComp4;
run;
*MERGE statement has more than one data set with repeats of BY values;
proc sort data=paper.CSRP_annual_returns out=paper.CSRP_annual_returns2
nodupkey;
by ticker;
run;
*Merge CEO data and firm data;
DATA paper.ceo_firm;
length ticker $5;
merge paper.CSRP_annual_returns2 (in=in1) paper.compustat_execucomp4 (in=in2);
by ticker;
if in1 and in2;
run;
proc contents data=paper.ceo_firm;
run;
proc contents data=paper.ceo_firm2;
run;
*remove missing return data;
data paper.ceo_firm;
set paper.ceo_firm;
if annual_return=. then
delete;
run;
proc means data=paper.CSRP_Monthly_Stock_char3 n nmiss;
var ret;
run;
proc means data=paper.Compustat_ExecuComp4 n nmiss;
var salary;
run;
proc means data=paper.CSRP_annual_returns n nmiss;
var annual_return;
run;
data paper.ceo_firm;
set paper.ceo_firm (rename=(BONUS=_BONUS STOCK_AWARDS=_STOCK_AWARDS));
BONUS=input(_BONUS, ?? 8.);
STOCK_AWARDS=input(_STOCK_AWARDS, ?? 8.);
run;
data paper.ceo_firm2;
set paper.ceo_firm2 (rename=(othcomp=_othcomp));
othcomp=input(_othcomp, ?? 8.);
run;
*create dummy variable that indicates whether the firm's market value increases in a given year;
data paper.ceo_firm;
set paper.ceo_firm;
by ticker year;
last_return=lag(annual_return);
if not first.ticker then
increase=(annual_return > last_return);
else
increase=.;
run;
*create dummy variable that indicates whether the CEO is male or female;
data paper.ceo_firm;
set paper.ceo_firm;
if gender='MALE' then male=1; else male=0;
run;
*bring variable in from other data set;
proc sql;
create table paper.ceo_firm2 as
select a.* , b.VOL
from paper.ceo_firm as a
left join
(select ticker
,year
,sum(VOL) as VOL
from paper.CSRP_Monthly_Stock_char4
group by ticker
,year
) as b
on a.ticker = b.ticker and
a.year = b.year;
quit;
/* CHECK FOR DUPLICATES */
data is_dup;
set paper.CSRP_Monthly_Stock_char4; /* dataset origin containing var to add VOL ? */
by ticker year;
if not (first.year and last.year);
run;
*find duplicates: ticker/year combo;
proc freq data=paper.CSRP_Monthly_Stock_char4;
tables ticker * year / noprint
out=paper.multiple_tyCSRP (where=(count > 1));
run;
proc freq data=paper.ceo_firm;
tables ticker * year / noprint
out=paper.multiple_tyfirm (where=(count > 1));
run;
*create variable log_salary;
data paper.ceo_firm;
set paper.ceo_firm;
if salary=0 then log_salary=0;
else log_salary=log(salary);
run;
*create variable log_annual_return;
data paper.ceo_firm;
set paper.ceo_firm;
if annual_return=0 then log_annual_return=0;
else log_annual_return=log(annual_return);
run;
*create variable log_bonus;
data paper.ceo_firm;
set paper.ceo_firm;
if bonus=0 then log_bonus=0;
else log_bonus=log(bonus);
run;
*create variable log_TOTAL_SEC;
data paper.ceo_firm;
set paper.ceo_firm;
if TOTAL_SEC=0 then log_TOTAL_SEC=0;
else log_TOTAL_SEC=log(TOTAL_SEC);
run;
*create interaction term;
data paper.ceo_firm2;
set paper.ceo_firm2;
annual_return_VOL=(annual_return*VOL);
run;
Thanks!
-SAStuck
... View more