Hey @Shmuel, thanks for the revised program. Here are some issues I ran into: 1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
70
71 /********************************************************************/
72 /* START RERUN TO REPRODUCE PAPER.CEO_FIRM DATASET */
73 /********************************************************************/
74
75 /* IF YOU HAVE STORAGE ISSUE DELETE THE LAST CREATED PAPER.CEO_FIRM */
76 proc datasets lib=paper;
76 ! delete ceo_firm nolist; quit;
NOTE: The file PAPER.NOLIST (memtype=DATA) was not found, but appears on a DELETE statement.
NOTE: Deleting PAPER.CEO_FIRM (memtype=DATA).
NOTE: PROCEDURE DATASETS used (Total process time):
real time 0.05 seconds
user cpu time 0.05 seconds
system cpu time 0.00 seconds
memory 2160.40k
OS Memory 28072.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 23 Switch Count 1
Page Faults 0
Page Reclaims 1090
Page Swaps 0
Voluntary Context Switches 25
Involuntary Context Switches 0
Block Input Operations 32
Block Output Operations 16
77
78 /* ============= RECREATE PAPER.CEO_FIRM ========================== */
79
80 *Merge CEO data and firm data;
81 DATA paper.ceo_firm_1;
82 length ticker $5;
83 merge paper.CSRP_annual_returns2 (in=in1)
84 paper.compustat_execucomp4 (in=in2);
85 by ticker;
86
87 if in1 and in2;
88 run;
NOTE: There were 10586 observations read from the data set PAPER.CSRP_ANNUAL_RETURNS2.
NOTE: There were 13355 observations read from the data set PAPER.COMPUSTAT_EXECUCOMP4.
NOTE: The data set PAPER.CEO_FIRM_1 has 12727 observations and 108 variables.
NOTE: DATA statement used (Total process time):
real time 0.13 seconds
user cpu time 0.02 seconds
system cpu time 0.05 seconds
memory 4219.21k
OS Memory 33968.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 24 Switch Count 5
Page Faults 0
Page Reclaims 1136
Page Swaps 0
Voluntary Context Switches 1072
Involuntary Context Switches 3
Block Input Operations 55104
Block Output Operations 51976
89
90 *remove missing return data;
91 *create dummy variable that indicates whether the firm's market value increases in a given year;
92
93 data paper.ceo_firm_2;
94 set paper.ceo_firm_1 (rename=(BONUS=_BONUS STOCK_AWARDS=_STOCK_AWARDS));
95 BONUS=input(_BONUS, ?? 8.);
96 STOCK_AWARDS=input(_STOCK_AWARDS, ?? 8.);
97
98 if annual_return=. then
99 delete;
100 run;
NOTE: There were 12727 observations read from the data set PAPER.CEO_FIRM_1.
NOTE: The data set PAPER.CEO_FIRM_2 has 12727 observations and 110 variables.
NOTE: DATA statement used (Total process time):
real time 0.12 seconds
user cpu time 0.01 seconds
system cpu time 0.03 seconds
memory 3640.34k
OS Memory 33452.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 25 Switch Count 1
Page Faults 0
Page Reclaims 566
Page Swaps 0
Voluntary Context Switches 1082
Involuntary Context Switches 1
Block Input Operations 52000
Block Output Operations 52744
101
102 data paper.ceo_firm;
103 set paper.ceo_firm_temp_2;
ERROR: File PAPER.CEO_FIRM_TEMP_2.DATA does not exist.
104 by ticker year;
105 last_return=lag(annual_return);
106
107 if not first.ticker then
108 increase=(annual_return > last_return);
109 else
110 increase=.;
111
112 if gender='MALE' then male=1; else male=0;
113 run;
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set PAPER.CEO_FIRM may be incomplete. When this step was stopped there were 0 observations and 5 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 498.06k
OS Memory 30632.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 26 Switch Count 1
Page Faults 0
Page Reclaims 101
Page Swaps 0
Voluntary Context Switches 30
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
114
115 /* DELETE TEMPORARY DATASETS */
116 proc datasets lib=paper;
116 ! delete ceo_firm_1 ceo_firm_2 nolist; quit;
NOTE: The file PAPER.NOLIST (memtype=DATA) was not found, but appears on a DELETE statement.
NOTE: Deleting PAPER.CEO_FIRM_1 (memtype=DATA).
NOTE: Deleting PAPER.CEO_FIRM_2 (memtype=DATA).
NOTE: PROCEDURE DATASETS used (Total process time):
real time 0.05 seconds
user cpu time 0.04 seconds
system cpu time 0.01 seconds
memory 636.75k
OS Memory 30888.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 27 Switch Count 2
Page Faults 0
Page Reclaims 103
Page Swaps 0
Voluntary Context Switches 50
Involuntary Context Switches 0
Block Input Operations 32
Block Output Operations 24
117
118 /* CHECK FOR DUPLICATES */
119 data is_dup;
120 set paper.CSRP_Monthly_Stock_char4; /* dataset origin containing var to add VOL ? */
121 by ticker year;
122 if not (firt.year and last.year);
_________
557
ERROR: DATA STEP Component Object failure. Aborted during the COMPILATION phase.
ERROR 557-185: Variable firt is not an object.
NOTE: The SAS System stopped processing this step because of errors.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1979.90k
OS Memory 31656.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 28 Switch Count 0
Page Faults 0
Page Reclaims 257
Page Swaps 0
Voluntary Context Switches 9
Involuntary Context Switches 0
Block Input Operations 288
Block Output Operations 0
123 run;
124
125 * are there duplicates ? ;
126 * if positive sort the dataset with NODUPKEY option ;
127 * alternativeley add more key variables to to join in next sql;
128
129 proc sort data=paper.CSRP_Monthly_Stock_char4 (keep ticker year VOL)
____ ______
12 12
ERROR 12-63: Missing '=' for option KEEP.
130 /* replace VOL to the required var */
131 out=crsp_nodup NODUPKEY;
132 by ticker year;
133 run;
NOTE: The SAS System stopped processing this step because of errors.
WARNING: The data set WORK.CRSP_NODUP may be incomplete. When this step was stopped there were 0 observations and 0 variables.
NOTE: PROCEDURE SORT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1658.37k
OS Memory 31656.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 29 Switch Count 1
Page Faults 0
Page Reclaims 265
Page Swaps 0
Voluntary Context Switches 8
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 160
134
135 * add the required var to paper.ceo_firm dataset ;
136 *bring variable in from other data set;
137 proc sql;
138 create table want as
139 select a.* , b.VOL
140 from paper.ceo_firm as a
141 left join paper.crsp_nodup /*CSRP_Monthly_Stock_char4*/ as b on a.ticker = b.ticker and a.year = b.year
141 ! /* and a.key = b.key ?????? */ ; quit;
ERROR: File PAPER.CRSP_NODUP.DATA does not exist.
NOTE: PROC SQL set option NOEXEC and will continue to check the syntax of statements.
NOTE: The SAS System stopped processing this step because of errors.
NOTE: PROCEDURE SQL used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 323.43k
OS Memory 30888.00k
Timestamp 04/14/2018 02:44:04 PM
Step Count 30 Switch Count 0
Page Faults 0
Page Reclaims 84
Page Swaps 0
Voluntary Context Switches 12
Involuntary Context Switches 0
Block Input Operations 288
Block Output Operations 0
142
143
144 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
156 Here is the code again: libname paper "~/425/425_Final_Paper";
*import compustat dataset;
PROC IMPORT out=paper.CompuStat_Execucomp
datafile="~/425/425_Final_Paper/CompuStat_Execucomp.csv" DBMS=CSV replace;
getnames=yes;
guessingrows=2000;
run;
*keep only CEOs;
data paper.Compustat_ExecuComp2;
set paper.Compustat_ExecuComp;
if CEOANN='CEO';
run;
*** extra DATA step for checking previous data step results by comparing number of records selected;
data check_CEOANN;
set paper.Compustat_ExecuComp;
if CEOANN=: 'CEO';
run;
proc sort data=paper.Compustat_ExecuComp2;
by ticker year;
run;
*import csrp dataset;
PROC IMPORT out=paper.CSRP_Monthly_Stock_char
datafile="~/425/425_Final_Paper/CSRP_MonthlyStock_char.csv" DBMS=CSV replace;
getnames=yes;
guessingrows=max;
run;
proc contents data=paper.CSRP_Monthly_Stock_char;
run;
*remove bad data;
data paper.CSRP_Monthly_Stock_char2;
set paper.CSRP_Monthly_Stock_char (rename=(ret=character_ret));
drop character_ret;
if cusip=' ' then
delete;
ret=input(character_ret, ??8.);
if ret=. then
delete;
date=input(put(date, z8.), yymmdd8.);
format date yymmdd10.;
year=year(date);
month=month(date);
if cusip=: '?' then
cusip=substr(cusip, 2);
run;
proc contents data=paper.CSRP_Monthly_Stock_char;
run;
proc contents data=paper.CSRP_Monthly_Stock_char2;
run;
proc contents data=paper.multiple_CEOs;
run;
proc sort data=paper.CSRP_Monthly_Stock_char2;
by ticker year;
run;
proc sort data=paper.Compustat_ExecuComp2;
by ticker year;
run;
*Remove all bad years from both data sources;
data paper.Compustat_ExecuComp3;
length ticker $5;
merge paper.Compustat_ExecuComp2 paper.multiple_CEOs (keep=ticker year
in=had_multiple_CEOs);
by ticker year;
if had_multiple_CEOs then
delete;
run;
data paper.CSRP_Monthly_Stock_char3;
merge paper.CSRP_Monthly_Stock_char2 paper.multiple_CEOs (keep=ticker year
in=had_multiple_CEOs);
by ticker year;
if had_multiple_CEOs then
delete;
run;
*find additional bad data: multiple return records for the same month/year;
proc freq data=paper.CSRP_Monthly_Stock_char3;
tables ticker * year * month / noprint
out=paper.multiple_returns (where=(count > 1));
run;
*Remove all matching year data for multiple returns;
proc sort data=paper.multiple_returns out=multiple_returns (keep=ticker year)
NODUPKEY;
by ticker year;
run;
data paper.Compustat_ExecuComp4;
merge paper.Compustat_ExecuComp3 (in=keepme)
multiple_returns (in=had_multiple_returns);
by ticker year;
if keepme;
if had_multiple_returns then
delete;
run;
data paper.CSRP_Monthly_Stock_char4;
length ticker $5;
merge paper.CSRP_Monthly_Stock_char3
multiple_returns (in=had_multiple_returns);
by ticker year;
if had_multiple_returns then
delete;
run;
proc contents data=paper.CSRP_Monthly_Stock_char3;
run;
proc contents data=paper.CSRP_Monthly_Stock_char4;
run;
*create new variable annualized growth;
data paper.CSRP_annual_returns;
set paper.CSRP_Monthly_Stock_char4;
by ticker year;
retain annual_return 1;
annual_return=annual_return * (1 + RET);
if month=12 or last.ticker;
annual_return=(annual_return - 1) * 100;
output;
annual_return=1;
keep ticker year annual_return;
run;
*use proc contents to see if there is a type mismatch;
proc contents data=paper.CSRP_annual_returns;
run;
proc contents data=paper.Compustat_ExecuComp4;
run;
*MERGE statement has more than one data set with repeats of BY values;
proc sort data=paper.CSRP_annual_returns out=paper.CSRP_annual_returns2
nodupkey;
by ticker;
run;
/********************************************************************/
/* START RERUN TO REPRODUCE PAPER.CEO_FIRM DATASET */
/********************************************************************/
/* IF YOU HAVE STORAGE ISSUE DELETE THE LAST CREATED PAPER.CEO_FIRM */
proc datasets lib=paper; delete ceo_firm nolist; quit;
/* ============= RECREATE PAPER.CEO_FIRM ========================== */
*Merge CEO data and firm data;
DATA paper.ceo_firm_1;
length ticker $5;
merge paper.CSRP_annual_returns2 (in=in1)
paper.compustat_execucomp4 (in=in2);
by ticker;
if in1 and in2;
run;
*remove missing return data;
*create dummy variable that indicates whether the firm's market value increases in a given year;
data paper.ceo_firm_2;
set paper.ceo_firm_1 (rename=(BONUS=_BONUS STOCK_AWARDS=_STOCK_AWARDS));
BONUS=input(_BONUS, ?? 8.);
STOCK_AWARDS=input(_STOCK_AWARDS, ?? 8.);
if annual_return=. then
delete;
run;
data paper.ceo_firm;
set paper.ceo_firm_temp_2;
by ticker year;
last_return=lag(annual_return);
if not first.ticker then
increase=(annual_return > last_return);
else
increase=.;
if gender='MALE' then male=1; else male=0;
run;
/* DELETE TEMPORARY DATASETS */
proc datasets lib=paper; delete ceo_firm_1 ceo_firm_2 nolist; quit;
/* CHECK FOR DUPLICATES */
data is_dup;
set paper.CSRP_Monthly_Stock_char4; /* dataset origin containing var to add VOL ? */
by ticker year;
if not (firt.year and last.year);
run;
* are there duplicates ? ;
* if positive sort the dataset with NODUPKEY option ;
* alternativeley add more key variables to to join in next sql;
proc sort data=paper.CSRP_Monthly_Stock_char4 (keep ticker year VOL)
/* replace VOL to the required var */
out=crsp_nodup NODUPKEY;
by ticker year;
run;
* add the required var to paper.ceo_firm dataset ;
*bring variable in from other data set;
proc sql;
create table want as
select a.* , b.VOL
from paper.ceo_firm as a
left join paper.crsp_nodup /*CSRP_Monthly_Stock_char4*/ as b on a.ticker = b.ticker and a.year = b.year /* and a.key = b.key ?????? */ ; quit; I really appreciate the help!
... View more