Hello, I am modeling the effect of firm performance on CEO salary. SAS read 10,741 observations from my merged ceo_firm data set, but only used 2,066 of them. 8,675 observations had missing values. I am surprised by this, however, due to the substantive data cleaning I have done in preparation for running the regression. Do you have any idea why I could still be missing so many observations? Here is my code: libname paper "~/425/425_Final_Paper";
*import compustat dataset;
PROC IMPORT out=paper.CompuStat_Execucomp
datafile="~/425/425_Final_Paper/CompuStat_Execucomp.csv"
DBMS=CSV replace;
getnames= yes;
guessingrows=2000;
run;
*keep only CEOs;
data paper.Compustat_ExecuComp2;
set paper.Compustat_ExecuComp;
if CEOANN = 'CEO';
run;
*** extra DATA step for checking previous data step results by comparing number of records selected;
data check_CEOANN;
set paper.Compustat_ExecuComp;
if CEOANN =: 'CEO';
run;
proc sort data=paper.Compustat_ExecuComp2;
by ticker year;
run;
*import csrp dataset;
PROC IMPORT out=paper.CSRP_Monthly_Stock_char
datafile="~/425/425_Final_Paper/CSRP_MonthlyStock_char.csv"
DBMS=CSV replace;
getnames= yes;
guessingrows=max;
run;
*remove bad data;
data paper.CSRP_Monthly_Stock_char2;
set paper.CSRP_Monthly_Stock_char (rename=(ret=character_ret));
drop character_ret;
if cusip = ' ' then delete;
ret = input(character_ret, ??8.);
if ret = . then delete;
date = input(put(date, z8.), yymmdd8.);
format date yymmdd10.;
year = year(date);
month = month(date);
if cusip =: '?' then cusip = substr(cusip, 2);
run;
proc contents
data=paper.CSRP_Monthly_Stock_char2;
run;
proc sort data=paper.CSRP_Monthly_Stock_char2;
by ticker year;
run;
*Remove all bad years from both data sources;
data paper.Compustat_ExecuComp3;
merge paper.Compustat_ExecuComp2
paper.multiple_CEOs (keep=ticker year in=had_multiple_CEOs);
by ticker year;
if had_multiple_CEOs then delete;
run;
data paper.CSRP_Monthly_Stock_char3;
merge paper.CSRP_Monthly_Stock_char2
paper.multiple_CEOs (keep=ticker year in=had_multiple_CEOs);
by ticker year;
if had_multiple_CEOs then delete;
run;
*find additional bad data: multiple return records for the same month/year;
proc freq data=paper.CSRP_Monthly_Stock_char3;
tables ticker * year * month / noprint out=paper.multiple_returns (where=(count > 1));
run;
*Remove all matching year data for multiple returns;
proc sort data=paper.multiple_returns out=multiple_returns (keep=ticker year) NODUPKEY;
by ticker year;
run;
data paper.Compustat_ExecuComp4;
merge paper.Compustat_ExecuComp3
multiple_returns (in=had_multiple_returns);
by ticker year;
if had_multiple_returns then delete;
run;
data paper.CSRP_Monthly_Stock_char4;
merge paper.CSRP_Monthly_Stock_char3
multiple_returns (in=had_multiple_returns);
by ticker year;
if had_multiple_returns then delete;
run;
*create new variable annualized growth;
data paper.CSRP_annual_returns;
set paper.CSRP_Monthly_Stock_char4;
by ticker year;
retain annual_return 1;
annual_return = annual_return * (1 + RET);
if month = 12 or last.ticker;
annual_return = (annual_return - 1) * 100;
output;
annual_return = 1;
keep ticker year annual_return;
run;
*use proc contents to see if there is a type mismatch;
proc contents
data=paper.CSRP_annual_returns;
run;
proc contents
data=paper.Compustat_ExecuComp4;
run;
*MERGE statement has more than one data set with repeats of BY values;
proc sort data=paper.compustat_execucomp4;
by ticker;
run;
proc sort data=paper.CSRP_annual_returns nodupkey;
by ticker;
run;
data want;
merge paper.compustat_execucomp4 (in=in1) paper.CSRP_annual_returns;
by ticker;
if in1;
run;
*Merge CEO data and firm data;
DATA paper.ceo_firm ;
length ticker $5;
MERGE paper.CSRP_annual_returns
paper.compustat_execucomp4;
BY ticker;
RUN;
proc contents
data=paper.ceo_firm;
run;
*remove missing return data;
data paper.ceo_firm;
set paper.ceo_firm;
if annual_return = . then delete;
run;
proc means data=paper.CSRP_Monthly_Stock_char3 n nmiss;
var ret;
run;
*Use OLS to estimate model;
title "";
ods graphics on;
proc reg data=paper.ceo_firm plots(maxpoints=none);
model salary = annual_return /clb acov;
run;
ods graphics off; Thanks in advance for the help!
... View more