BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
margalits1
Obsidian | Level 7

Hi there!

I am a PhD student working with SAS and WRDS cloud.

I have a loop over the years 1990-2018 that stops processing without any availible log. Just gets stuck and that's it.

I checked the loop over shorter periods of time and it works just fine. For example, I checked for the years 1990-1992 and it works.

What could be the issue?

I thought maybe the problem was somehow due to session timeout? Any Ideas? 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Kurt_Bremser
Super User

Instead of

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

do this:

proc append
  base=all_years_signs
  data=b
;
run;

Instead of

if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;

do

where
  datadate>=&start_date and datadate<=&end_date and
  INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C' and not(
    finalq ne "Y" or iid ne "01" or fyr~=12 or rdq=.
  )
;

WHERE conditions outperform subsetting IFs.

 

It might also be that your code runs so long it exceeds a limit that drops an inactive network connection.

Doesn't WRDS use SAS/CONNECT?

View solution in original post

5 REPLIES 5
margalits1
Obsidian | Level 7
The relevant macro is calc year:
%macro calc_year(year);
/* Run signs regression for a single year */
%combine_sic_codes;
%choose_years(&year);
%at_least_two_firms;
%consecutive_24_quarters;
%rdq_min_max;
%add_earning_surprise;
%the_y_side(&year);
%combine_x_and_y(&year);

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

%macro program;
%local year;
%do year=1990 %to 2018;
%calc_year(&year);
%end;

%save_data_file_to_temp(all_years_signs);
%export_data(all_years_signs)
%mend;



But here is the full code:
/*Calculating the sign*/

%macro wrds_connect;
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;

rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';
libname temp '/scratch/tau/margalit';
libname cc '/wrds/crsp/sasdata/a_ccm';
libname index '/wrds/crsp/sasdata/a_indexes';
libname comp '/wrds/comp/sasdata/nam';
libname ix '/wrds/crsp/sasdata/ix';
libname home '/home/tau/margalit';
run;


%mend;
%wrds_connect;

%macro combine_sic_codes;
data sic_codes; set home.sic_codes_q;
keep gvkey datadate sic;
run;

proc sort data=sic_codes nodupkey;
by gvkey datadate;
run;

/*Combine with SIC Codes. Not appear in cloud*/
data comp1; set comp.fundq;
run;

proc sort data=comp1 nodupkey;
by gvkey datadate;
run;

data comp2;
merge comp1 sic_codes;
by gvkey datadate;
run;

%mend;

%macro choose_years(year_sign);
%local / readonly start_year=%eval(&year_sign-6);
%local / readonly end_year=%eval(&year_sign-1);
%local / readonly start_date="01jan&start_year"d;
%local / readonly end_date="31dec&end_year"d;

data comp3; set comp2;
if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;
keep datadate sic fyr rdq gvkey FYEARQ FQTR DATACQTR cusip ceqq
XRDQ REVTQ MKVALTQ OANCFY NIY ATQ SALEQ IBQ;
run;
%mend;

%macro at_least_two_firms;
proc sql;
create table sic_helper as
select sic,datadate, count(distinct gvkey) as sic_count
from comp3
group by sic, datadate
having sic_count>=2;
quit;
run;

/*Keep only relevant sic codes*/

proc sql;
create table comp as
select * from comp3 c
where exists(
select * from sic_helper sh
where sh.sic = c.sic and sh.datadate = c.datadate);
quit;
run;

%mend;


%macro keep_n_consecutive_obs_for_var(num_obs, var_name, out_name);
proc sort data=comp;
by gvkey &var_name;
run;

proc means data=comp N noprint;
var &var_name;
by gvkey;
output out=&out_name N=NumFirmObs;
run;

data &out_name; set &out_name;
if NumFirmObs=&num_obs;
run;
%mend;

%macro consecutive_24_quarters;
/*keep only firms with 24 consequative quarters*/
%keep_n_consecutive_obs_for_var(24, rdq, Z1);
%keep_n_consecutive_obs_for_var(24, SALEQ, Z2);
%keep_n_consecutive_obs_for_var(24, IBQ, Z3); /*IBQ - income before extraordinary items*/

/*Select only firms with 24 consequtive obs*/
proc sql;
create table reg_data as
select * from comp
where
exists(select * from z1 where z1.gvkey = comp.gvkey) and
exists(select * from z2 where z2.gvkey = comp.gvkey) and
exists(select * from z3 where z3.gvkey = comp.gvkey);
run;

%mend;

%macro save_data_file_to_temp(data_name);
data temp.&data_name; set &data_name;
run;
%mend;


%macro export_data(data_name);
proc export data=&data_name
outfile='/scratch/tau/margalit/&data_name.csv'
dbms=csv
replace;
run;
%mend;

%macro rdq_min_max;
/*calcuate the report period - min and max date*/
proc sort data=reg_data;
by sic datadate;
run;

/*find minumin and maximum rdq date within SIC CODE*/
proc sql;
create table rdq_min_max as
select sic, datadate, min(rdq) as min, max(rdq) as max
from reg_data
group by sic, datadate;
run;

/*Format Dates*/
proc datasets lib=temp nolist;
modify rdq_min_max;
format max date9.;
format min date9.;
run;

%save_data_file_to_temp(rdq_min_max);
%mend;

%macro add_earning_surprise;
/*add previous quarter*/
proc sql;
create table reg_data1 as
select reg_data.*, b.ibq as last_ibq
from reg_data
join reg_data as b
on reg_data.gvkey = b.gvkey and
reg_data.fqtr = b.fqtr and
reg_data.fyearq = b.fyearq + 1;
run;

/*add agg ibq*/
proc sql;
create table agg as
select sic, datadate,
sum(last_ibq) as last_q_sum,
sum(ibq) as sum_q,
sum(ATQ) as sum_agg_atq
from reg_data1
group by sic, datadate;
run;

/*Combine with RegData table
reg_data: sum_all_others_without i, earning surprise firm
*/
proc sql;
create table reg_data3 as
select reg_data1.*,agg.last_q_sum, agg.sum_q,
/*earnings aggragate other firms*/
agg.sum_q - reg_data1.ibq as agg_ibq_without_i,
agg.last_q_sum - reg_data1.last_ibq as last_agg_ibq_without_i,
agg.sum_agg_atq - reg_data1.atq as agg_sum_atq_without_i,
(reg_data1.ibq - reg_data1.last_ibq) / reg_data1.atq as ear_surprise_i
from reg_data1
join agg
on reg_data1.sic = agg.sic and
reg_data1.datadate = agg.datadate;
run;

data reg_data3; set reg_data3;
earn_surprise_other=(agg_ibq_without_i-last_agg_ibq_without_i)/agg_sum_atq_without_i;
run;

data reg_data3; set reg_data3;
if earn_surprise_other=. then delete;
run;

%save_data_file_to_temp(reg_data3);
%mend;

%macro the_y_side(year_sign);
/*TODO: Split to separate Macros!!!*/
/*Create table with Comp and Compustat data*/
proc sql;
create table comp as
select a.*, b.lpermno as permno
from temp.reg_data3 as a, cc.ccmxpf_linktable as b
where
a.gvkey = b.gvkey and
b.linkprim in ('P', 'C') and
b.LINKTYPE in ('LU', 'LC') and
(a.datadate >= b.LINKDT or missing(b.LINKENDDT)) and
(a.datadate <= b.LINKENDDT or missing(b.LINKENDDT))
order by gvkey, datadate;
quit;
run;

/*Until this part- VVVV*/

/*Create table with Comp and Compustat data with daily stock data XX to XX days around quarterly earning ann*/
%local / readonly dsf_start_year=%eval(&year_sign-5);
%local / readonly dsf_end_year=&year_sign;
%local / readonly dsf_start_date="01jan&dsf_start_year"d;
%local / readonly dsf_end_date="31mar&dsf_end_year"d;

proc sql;
create table temp.stocks as
select comp.gvkey, dsf.ret, dsf.date, dsf.shrout, dsf.prc, dsf.cusip
from crspa.dsf, comp
where
dsf.permno = comp.permno and
dsf.date >= &dsf_start_date and
dsf.date <= &dsf_end_date;
quit;
run;

/*Not sure if to add*/
data temp.stocks; set temp.stocks;
proc sort nodup;
by gvkey date;
run;

/*compute returns*/
proc sql;
create table reg_min_max as
select rd.*, mm.min, mm.max
from comp rd
join temp.rdq_min_max mm
on rd.sic = mm.sic and
rd.datadate = mm.datadate;
run;

/*reg_min_max VVV*/

proc sql;
create table reg_stocks as
select s.*, rmm.sic, rmm.datadate, rmm.min, rmm.max
from temp.stocks s
join reg_min_max rmm
on s.gvkey = rmm.gvkey and
s.date between rmm.min and rmm.max;
run;

/*Not sure if to add*/
data reg_stocks; set reg_stocks;
proc sort nodup;
by gvkey date;
run;

proc sql;
create table reg_stocks_log as
select rs.*, LOG(rs.ret + 1) as lnret
from reg_stocks rs;
run;

proc sql;
create table returns as
select rsl.gvkey, rsl.sic, rsl.datadate,
exp(sum(rsl.lnret)) - 1 as ret_window
from reg_stocks_log rsl
group by rsl.gvkey, rsl.sic, rsl.datadate;
run;

/*%save_data_file_to_temp(returns);*/
%mend;

%macro combine_x_and_y(year_sign);
proc sql;
create table x_y as
select rd3.*, r.ret_window
from returns r
join reg_data3 rd3
on r.gvkey = rd3.gvkey and
r.datadate = rd3.datadate;
run;

proc sort data=x_y;
by sic gvkey datadate;
run;

%save_data_file_to_temp(x_y);

proc panel data = x_y outest=b noprint;
id gvkey datadate;
model ret_window = ear_surprise_i earn_surprise_other / fixone;
by sic;
quit;
run;

data b; set b;
year = &year_sign;
run;
%mend;

%macro calc_year(year);
/* Run signs regression for a single year */
%combine_sic_codes;
%choose_years(&year);
%at_least_two_firms;
%consecutive_24_quarters;
%rdq_min_max;
%add_earning_surprise;
%the_y_side(&year);
%combine_x_and_y(&year);

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

%macro program;
%local year;
%do year=1990 %to 2018;
%calc_year(&year);
%end;

%save_data_file_to_temp(all_years_signs);
%export_data(all_years_signs)
%mend;

%program;

Kurt_Bremser
Super User

Instead of

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

do this:

proc append
  base=all_years_signs
  data=b
;
run;

Instead of

if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;

do

where
  datadate>=&start_date and datadate<=&end_date and
  INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C' and not(
    finalq ne "Y" or iid ne "01" or fyr~=12 or rdq=.
  )
;

WHERE conditions outperform subsetting IFs.

 

It might also be that your code runs so long it exceeds a limit that drops an inactive network connection.

Doesn't WRDS use SAS/CONNECT?

margalits1
Obsidian | Level 7
Hello Kurt
thank you I will try your suggested chnges and update whether it works 🙂
Yes WRDS uses SAS/connect:
I have it also in a macro:
%macro wrds_connect;
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;

rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';
libname temp '/scratch/tau/margalit';
libname cc '/wrds/crsp/sasdata/a_ccm';
libname index '/wrds/crsp/sasdata/a_indexes';
libname comp '/wrds/comp/sasdata/nam';
libname ix '/wrds/crsp/sasdata/ix';
libname home '/home/tau/margalit';
run;


%mend;
%wrds_connect;

Thanks again ! will update results
margalits1
Obsidian | Level 7

Hello Kurt,

I wanted to update you that I added the changes and the code worked.

Thank you very much for your help!

Really appricieate it.

Margalit

SAS Innovate 2025: Call for Content

Are you ready for the spotlight? We're accepting content ideas for SAS Innovate 2025 to be held May 6-9 in Orlando, FL. The call is open until September 16. Read more here about why you should contribute and what is in it for you!

Submit your idea!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 5 replies
  • 999 views
  • 1 like
  • 2 in conversation