- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Hi there!
I am a PhD student working with SAS and WRDS cloud.
I have a loop over the years 1990-2018 that stops processing without any availible log. Just gets stuck and that's it.
I checked the loop over shorter periods of time and it works just fine. For example, I checked for the years 1990-1992 and it works.
What could be the issue?
I thought maybe the problem was somehow due to session timeout? Any Ideas?
Accepted Solutions
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Instead of
%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;
do this:
proc append
base=all_years_signs
data=b
;
run;
Instead of
if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;
do
where
datadate>=&start_date and datadate<=&end_date and
INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C' and not(
finalq ne "Y" or iid ne "01" or fyr~=12 or rdq=.
)
;
WHERE conditions outperform subsetting IFs.
It might also be that your code runs so long it exceeds a limit that drops an inactive network connection.
Doesn't WRDS use SAS/CONNECT?
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Please post your code. You may have an issue where data keeps growing with longer loops, and/or processing time grows exponentially.
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
%macro calc_year(year);
/* Run signs regression for a single year */
%combine_sic_codes;
%choose_years(&year);
%at_least_two_firms;
%consecutive_24_quarters;
%rdq_min_max;
%add_earning_surprise;
%the_y_side(&year);
%combine_x_and_y(&year);
%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;
%macro program;
%local year;
%do year=1990 %to 2018;
%calc_year(&year);
%end;
%save_data_file_to_temp(all_years_signs);
%export_data(all_years_signs)
%mend;
But here is the full code:
/*Calculating the sign*/
%macro wrds_connect;
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;
rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';
libname temp '/scratch/tau/margalit';
libname cc '/wrds/crsp/sasdata/a_ccm';
libname index '/wrds/crsp/sasdata/a_indexes';
libname comp '/wrds/comp/sasdata/nam';
libname ix '/wrds/crsp/sasdata/ix';
libname home '/home/tau/margalit';
run;
%mend;
%wrds_connect;
%macro combine_sic_codes;
data sic_codes; set home.sic_codes_q;
keep gvkey datadate sic;
run;
proc sort data=sic_codes nodupkey;
by gvkey datadate;
run;
/*Combine with SIC Codes. Not appear in cloud*/
data comp1; set comp.fundq;
run;
proc sort data=comp1 nodupkey;
by gvkey datadate;
run;
data comp2;
merge comp1 sic_codes;
by gvkey datadate;
run;
%mend;
%macro choose_years(year_sign);
%local / readonly start_year=%eval(&year_sign-6);
%local / readonly end_year=%eval(&year_sign-1);
%local / readonly start_date="01jan&start_year"d;
%local / readonly end_date="31dec&end_year"d;
data comp3; set comp2;
if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;
keep datadate sic fyr rdq gvkey FYEARQ FQTR DATACQTR cusip ceqq
XRDQ REVTQ MKVALTQ OANCFY NIY ATQ SALEQ IBQ;
run;
%mend;
%macro at_least_two_firms;
proc sql;
create table sic_helper as
select sic,datadate, count(distinct gvkey) as sic_count
from comp3
group by sic, datadate
having sic_count>=2;
quit;
run;
/*Keep only relevant sic codes*/
proc sql;
create table comp as
select * from comp3 c
where exists(
select * from sic_helper sh
where sh.sic = c.sic and sh.datadate = c.datadate);
quit;
run;
%mend;
%macro keep_n_consecutive_obs_for_var(num_obs, var_name, out_name);
proc sort data=comp;
by gvkey &var_name;
run;
proc means data=comp N noprint;
var &var_name;
by gvkey;
output out=&out_name N=NumFirmObs;
run;
data &out_name; set &out_name;
if NumFirmObs=&num_obs;
run;
%mend;
%macro consecutive_24_quarters;
/*keep only firms with 24 consequative quarters*/
%keep_n_consecutive_obs_for_var(24, rdq, Z1);
%keep_n_consecutive_obs_for_var(24, SALEQ, Z2);
%keep_n_consecutive_obs_for_var(24, IBQ, Z3); /*IBQ - income before extraordinary items*/
/*Select only firms with 24 consequtive obs*/
proc sql;
create table reg_data as
select * from comp
where
exists(select * from z1 where z1.gvkey = comp.gvkey) and
exists(select * from z2 where z2.gvkey = comp.gvkey) and
exists(select * from z3 where z3.gvkey = comp.gvkey);
run;
%mend;
%macro save_data_file_to_temp(data_name);
data temp.&data_name; set &data_name;
run;
%mend;
%macro export_data(data_name);
proc export data=&data_name
outfile='/scratch/tau/margalit/&data_name.csv'
dbms=csv
replace;
run;
%mend;
%macro rdq_min_max;
/*calcuate the report period - min and max date*/
proc sort data=reg_data;
by sic datadate;
run;
/*find minumin and maximum rdq date within SIC CODE*/
proc sql;
create table rdq_min_max as
select sic, datadate, min(rdq) as min, max(rdq) as max
from reg_data
group by sic, datadate;
run;
/*Format Dates*/
proc datasets lib=temp nolist;
modify rdq_min_max;
format max date9.;
format min date9.;
run;
%save_data_file_to_temp(rdq_min_max);
%mend;
%macro add_earning_surprise;
/*add previous quarter*/
proc sql;
create table reg_data1 as
select reg_data.*, b.ibq as last_ibq
from reg_data
join reg_data as b
on reg_data.gvkey = b.gvkey and
reg_data.fqtr = b.fqtr and
reg_data.fyearq = b.fyearq + 1;
run;
/*add agg ibq*/
proc sql;
create table agg as
select sic, datadate,
sum(last_ibq) as last_q_sum,
sum(ibq) as sum_q,
sum(ATQ) as sum_agg_atq
from reg_data1
group by sic, datadate;
run;
/*Combine with RegData table
reg_data: sum_all_others_without i, earning surprise firm
*/
proc sql;
create table reg_data3 as
select reg_data1.*,agg.last_q_sum, agg.sum_q,
/*earnings aggragate other firms*/
agg.sum_q - reg_data1.ibq as agg_ibq_without_i,
agg.last_q_sum - reg_data1.last_ibq as last_agg_ibq_without_i,
agg.sum_agg_atq - reg_data1.atq as agg_sum_atq_without_i,
(reg_data1.ibq - reg_data1.last_ibq) / reg_data1.atq as ear_surprise_i
from reg_data1
join agg
on reg_data1.sic = agg.sic and
reg_data1.datadate = agg.datadate;
run;
data reg_data3; set reg_data3;
earn_surprise_other=(agg_ibq_without_i-last_agg_ibq_without_i)/agg_sum_atq_without_i;
run;
data reg_data3; set reg_data3;
if earn_surprise_other=. then delete;
run;
%save_data_file_to_temp(reg_data3);
%mend;
%macro the_y_side(year_sign);
/*TODO: Split to separate Macros!!!*/
/*Create table with Comp and Compustat data*/
proc sql;
create table comp as
select a.*, b.lpermno as permno
from temp.reg_data3 as a, cc.ccmxpf_linktable as b
where
a.gvkey = b.gvkey and
b.linkprim in ('P', 'C') and
b.LINKTYPE in ('LU', 'LC') and
(a.datadate >= b.LINKDT or missing(b.LINKENDDT)) and
(a.datadate <= b.LINKENDDT or missing(b.LINKENDDT))
order by gvkey, datadate;
quit;
run;
/*Until this part- VVVV*/
/*Create table with Comp and Compustat data with daily stock data XX to XX days around quarterly earning ann*/
%local / readonly dsf_start_year=%eval(&year_sign-5);
%local / readonly dsf_end_year=&year_sign;
%local / readonly dsf_start_date="01jan&dsf_start_year"d;
%local / readonly dsf_end_date="31mar&dsf_end_year"d;
proc sql;
create table temp.stocks as
select comp.gvkey, dsf.ret, dsf.date, dsf.shrout, dsf.prc, dsf.cusip
from crspa.dsf, comp
where
dsf.permno = comp.permno and
dsf.date >= &dsf_start_date and
dsf.date <= &dsf_end_date;
quit;
run;
/*Not sure if to add*/
data temp.stocks; set temp.stocks;
proc sort nodup;
by gvkey date;
run;
/*compute returns*/
proc sql;
create table reg_min_max as
select rd.*, mm.min, mm.max
from comp rd
join temp.rdq_min_max mm
on rd.sic = mm.sic and
rd.datadate = mm.datadate;
run;
/*reg_min_max VVV*/
proc sql;
create table reg_stocks as
select s.*, rmm.sic, rmm.datadate, rmm.min, rmm.max
from temp.stocks s
join reg_min_max rmm
on s.gvkey = rmm.gvkey and
s.date between rmm.min and rmm.max;
run;
/*Not sure if to add*/
data reg_stocks; set reg_stocks;
proc sort nodup;
by gvkey date;
run;
proc sql;
create table reg_stocks_log as
select rs.*, LOG(rs.ret + 1) as lnret
from reg_stocks rs;
run;
proc sql;
create table returns as
select rsl.gvkey, rsl.sic, rsl.datadate,
exp(sum(rsl.lnret)) - 1 as ret_window
from reg_stocks_log rsl
group by rsl.gvkey, rsl.sic, rsl.datadate;
run;
/*%save_data_file_to_temp(returns);*/
%mend;
%macro combine_x_and_y(year_sign);
proc sql;
create table x_y as
select rd3.*, r.ret_window
from returns r
join reg_data3 rd3
on r.gvkey = rd3.gvkey and
r.datadate = rd3.datadate;
run;
proc sort data=x_y;
by sic gvkey datadate;
run;
%save_data_file_to_temp(x_y);
proc panel data = x_y outest=b noprint;
id gvkey datadate;
model ret_window = ear_surprise_i earn_surprise_other / fixone;
by sic;
quit;
run;
data b; set b;
year = &year_sign;
run;
%mend;
%macro calc_year(year);
/* Run signs regression for a single year */
%combine_sic_codes;
%choose_years(&year);
%at_least_two_firms;
%consecutive_24_quarters;
%rdq_min_max;
%add_earning_surprise;
%the_y_side(&year);
%combine_x_and_y(&year);
%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;
%macro program;
%local year;
%do year=1990 %to 2018;
%calc_year(&year);
%end;
%save_data_file_to_temp(all_years_signs);
%export_data(all_years_signs)
%mend;
%program;
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Instead of
%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;
do this:
proc append
base=all_years_signs
data=b
;
run;
Instead of
if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;
do
where
datadate>=&start_date and datadate<=&end_date and
INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C' and not(
finalq ne "Y" or iid ne "01" or fyr~=12 or rdq=.
)
;
WHERE conditions outperform subsetting IFs.
It might also be that your code runs so long it exceeds a limit that drops an inactive network connection.
Doesn't WRDS use SAS/CONNECT?
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
thank you I will try your suggested chnges and update whether it works 🙂
Yes WRDS uses SAS/connect:
I have it also in a macro:
%macro wrds_connect;
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;
rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';
libname temp '/scratch/tau/margalit';
libname cc '/wrds/crsp/sasdata/a_ccm';
libname index '/wrds/crsp/sasdata/a_indexes';
libname comp '/wrds/comp/sasdata/nam';
libname ix '/wrds/crsp/sasdata/ix';
libname home '/home/tau/margalit';
run;
%mend;
%wrds_connect;
Thanks again ! will update results
- Mark as New
- Bookmark
- Subscribe
- Mute
- RSS Feed
- Permalink
- Report Inappropriate Content
Hello Kurt,
I wanted to update you that I added the changes and the code worked.
Thank you very much for your help!
Really appricieate it.
Margalit