SAS Programming

DATA Step, Macro, Functions and more
BookmarkSubscribeRSS Feed
🔒 This topic is solved and locked. Need further help from the community? Please sign in and ask a new question.
margalits1
Obsidian | Level 7

Hi there!

I am a PhD student working with SAS and WRDS cloud.

I have a loop over the years 1990-2018 that stops processing without any availible log. Just gets stuck and that's it.

I checked the loop over shorter periods of time and it works just fine. For example, I checked for the years 1990-1992 and it works.

What could be the issue?

I thought maybe the problem was somehow due to session timeout? Any Ideas? 

 

1 ACCEPTED SOLUTION

Accepted Solutions
Kurt_Bremser
Super User

Instead of

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

do this:

proc append
  base=all_years_signs
  data=b
;
run;

Instead of

if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;

do

where
  datadate>=&start_date and datadate<=&end_date and
  INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C' and not(
    finalq ne "Y" or iid ne "01" or fyr~=12 or rdq=.
  )
;

WHERE conditions outperform subsetting IFs.

 

It might also be that your code runs so long it exceeds a limit that drops an inactive network connection.

Doesn't WRDS use SAS/CONNECT?

View solution in original post

5 REPLIES 5
margalits1
Obsidian | Level 7
The relevant macro is calc year:
%macro calc_year(year);
/* Run signs regression for a single year */
%combine_sic_codes;
%choose_years(&year);
%at_least_two_firms;
%consecutive_24_quarters;
%rdq_min_max;
%add_earning_surprise;
%the_y_side(&year);
%combine_x_and_y(&year);

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

%macro program;
%local year;
%do year=1990 %to 2018;
%calc_year(&year);
%end;

%save_data_file_to_temp(all_years_signs);
%export_data(all_years_signs)
%mend;



But here is the full code:
/*Calculating the sign*/

%macro wrds_connect;
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;

rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';
libname temp '/scratch/tau/margalit';
libname cc '/wrds/crsp/sasdata/a_ccm';
libname index '/wrds/crsp/sasdata/a_indexes';
libname comp '/wrds/comp/sasdata/nam';
libname ix '/wrds/crsp/sasdata/ix';
libname home '/home/tau/margalit';
run;


%mend;
%wrds_connect;

%macro combine_sic_codes;
data sic_codes; set home.sic_codes_q;
keep gvkey datadate sic;
run;

proc sort data=sic_codes nodupkey;
by gvkey datadate;
run;

/*Combine with SIC Codes. Not appear in cloud*/
data comp1; set comp.fundq;
run;

proc sort data=comp1 nodupkey;
by gvkey datadate;
run;

data comp2;
merge comp1 sic_codes;
by gvkey datadate;
run;

%mend;

%macro choose_years(year_sign);
%local / readonly start_year=%eval(&year_sign-6);
%local / readonly end_year=%eval(&year_sign-1);
%local / readonly start_date="01jan&start_year"d;
%local / readonly end_date="31dec&end_year"d;

data comp3; set comp2;
if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;
keep datadate sic fyr rdq gvkey FYEARQ FQTR DATACQTR cusip ceqq
XRDQ REVTQ MKVALTQ OANCFY NIY ATQ SALEQ IBQ;
run;
%mend;

%macro at_least_two_firms;
proc sql;
create table sic_helper as
select sic,datadate, count(distinct gvkey) as sic_count
from comp3
group by sic, datadate
having sic_count>=2;
quit;
run;

/*Keep only relevant sic codes*/

proc sql;
create table comp as
select * from comp3 c
where exists(
select * from sic_helper sh
where sh.sic = c.sic and sh.datadate = c.datadate);
quit;
run;

%mend;


%macro keep_n_consecutive_obs_for_var(num_obs, var_name, out_name);
proc sort data=comp;
by gvkey &var_name;
run;

proc means data=comp N noprint;
var &var_name;
by gvkey;
output out=&out_name N=NumFirmObs;
run;

data &out_name; set &out_name;
if NumFirmObs=&num_obs;
run;
%mend;

%macro consecutive_24_quarters;
/*keep only firms with 24 consequative quarters*/
%keep_n_consecutive_obs_for_var(24, rdq, Z1);
%keep_n_consecutive_obs_for_var(24, SALEQ, Z2);
%keep_n_consecutive_obs_for_var(24, IBQ, Z3); /*IBQ - income before extraordinary items*/

/*Select only firms with 24 consequtive obs*/
proc sql;
create table reg_data as
select * from comp
where
exists(select * from z1 where z1.gvkey = comp.gvkey) and
exists(select * from z2 where z2.gvkey = comp.gvkey) and
exists(select * from z3 where z3.gvkey = comp.gvkey);
run;

%mend;

%macro save_data_file_to_temp(data_name);
data temp.&data_name; set &data_name;
run;
%mend;


%macro export_data(data_name);
proc export data=&data_name
outfile='/scratch/tau/margalit/&data_name.csv'
dbms=csv
replace;
run;
%mend;

%macro rdq_min_max;
/*calcuate the report period - min and max date*/
proc sort data=reg_data;
by sic datadate;
run;

/*find minumin and maximum rdq date within SIC CODE*/
proc sql;
create table rdq_min_max as
select sic, datadate, min(rdq) as min, max(rdq) as max
from reg_data
group by sic, datadate;
run;

/*Format Dates*/
proc datasets lib=temp nolist;
modify rdq_min_max;
format max date9.;
format min date9.;
run;

%save_data_file_to_temp(rdq_min_max);
%mend;

%macro add_earning_surprise;
/*add previous quarter*/
proc sql;
create table reg_data1 as
select reg_data.*, b.ibq as last_ibq
from reg_data
join reg_data as b
on reg_data.gvkey = b.gvkey and
reg_data.fqtr = b.fqtr and
reg_data.fyearq = b.fyearq + 1;
run;

/*add agg ibq*/
proc sql;
create table agg as
select sic, datadate,
sum(last_ibq) as last_q_sum,
sum(ibq) as sum_q,
sum(ATQ) as sum_agg_atq
from reg_data1
group by sic, datadate;
run;

/*Combine with RegData table
reg_data: sum_all_others_without i, earning surprise firm
*/
proc sql;
create table reg_data3 as
select reg_data1.*,agg.last_q_sum, agg.sum_q,
/*earnings aggragate other firms*/
agg.sum_q - reg_data1.ibq as agg_ibq_without_i,
agg.last_q_sum - reg_data1.last_ibq as last_agg_ibq_without_i,
agg.sum_agg_atq - reg_data1.atq as agg_sum_atq_without_i,
(reg_data1.ibq - reg_data1.last_ibq) / reg_data1.atq as ear_surprise_i
from reg_data1
join agg
on reg_data1.sic = agg.sic and
reg_data1.datadate = agg.datadate;
run;

data reg_data3; set reg_data3;
earn_surprise_other=(agg_ibq_without_i-last_agg_ibq_without_i)/agg_sum_atq_without_i;
run;

data reg_data3; set reg_data3;
if earn_surprise_other=. then delete;
run;

%save_data_file_to_temp(reg_data3);
%mend;

%macro the_y_side(year_sign);
/*TODO: Split to separate Macros!!!*/
/*Create table with Comp and Compustat data*/
proc sql;
create table comp as
select a.*, b.lpermno as permno
from temp.reg_data3 as a, cc.ccmxpf_linktable as b
where
a.gvkey = b.gvkey and
b.linkprim in ('P', 'C') and
b.LINKTYPE in ('LU', 'LC') and
(a.datadate >= b.LINKDT or missing(b.LINKENDDT)) and
(a.datadate <= b.LINKENDDT or missing(b.LINKENDDT))
order by gvkey, datadate;
quit;
run;

/*Until this part- VVVV*/

/*Create table with Comp and Compustat data with daily stock data XX to XX days around quarterly earning ann*/
%local / readonly dsf_start_year=%eval(&year_sign-5);
%local / readonly dsf_end_year=&year_sign;
%local / readonly dsf_start_date="01jan&dsf_start_year"d;
%local / readonly dsf_end_date="31mar&dsf_end_year"d;

proc sql;
create table temp.stocks as
select comp.gvkey, dsf.ret, dsf.date, dsf.shrout, dsf.prc, dsf.cusip
from crspa.dsf, comp
where
dsf.permno = comp.permno and
dsf.date >= &dsf_start_date and
dsf.date <= &dsf_end_date;
quit;
run;

/*Not sure if to add*/
data temp.stocks; set temp.stocks;
proc sort nodup;
by gvkey date;
run;

/*compute returns*/
proc sql;
create table reg_min_max as
select rd.*, mm.min, mm.max
from comp rd
join temp.rdq_min_max mm
on rd.sic = mm.sic and
rd.datadate = mm.datadate;
run;

/*reg_min_max VVV*/

proc sql;
create table reg_stocks as
select s.*, rmm.sic, rmm.datadate, rmm.min, rmm.max
from temp.stocks s
join reg_min_max rmm
on s.gvkey = rmm.gvkey and
s.date between rmm.min and rmm.max;
run;

/*Not sure if to add*/
data reg_stocks; set reg_stocks;
proc sort nodup;
by gvkey date;
run;

proc sql;
create table reg_stocks_log as
select rs.*, LOG(rs.ret + 1) as lnret
from reg_stocks rs;
run;

proc sql;
create table returns as
select rsl.gvkey, rsl.sic, rsl.datadate,
exp(sum(rsl.lnret)) - 1 as ret_window
from reg_stocks_log rsl
group by rsl.gvkey, rsl.sic, rsl.datadate;
run;

/*%save_data_file_to_temp(returns);*/
%mend;

%macro combine_x_and_y(year_sign);
proc sql;
create table x_y as
select rd3.*, r.ret_window
from returns r
join reg_data3 rd3
on r.gvkey = rd3.gvkey and
r.datadate = rd3.datadate;
run;

proc sort data=x_y;
by sic gvkey datadate;
run;

%save_data_file_to_temp(x_y);

proc panel data = x_y outest=b noprint;
id gvkey datadate;
model ret_window = ear_surprise_i earn_surprise_other / fixone;
by sic;
quit;
run;

data b; set b;
year = &year_sign;
run;
%mend;

%macro calc_year(year);
/* Run signs regression for a single year */
%combine_sic_codes;
%choose_years(&year);
%at_least_two_firms;
%consecutive_24_quarters;
%rdq_min_max;
%add_earning_surprise;
%the_y_side(&year);
%combine_x_and_y(&year);

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

%macro program;
%local year;
%do year=1990 %to 2018;
%calc_year(&year);
%end;

%save_data_file_to_temp(all_years_signs);
%export_data(all_years_signs)
%mend;

%program;

Kurt_Bremser
Super User

Instead of

%if %sysfunc(exist(all_years_signs)) %then %do;
proc sql;
insert into all_years_signs
select * from b;
quit;
run;
%end;
%else %do;
proc sql;
create table all_years_signs as
select * from b;
quit;
run;
%end;
%mend;

do this:

proc append
  base=all_years_signs
  data=b
;
run;

Instead of

if datadate>=&start_date and datadate<=&end_date;
if INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C';
if finalq ne "Y" or iid ne "01" then delete;
if fyr~=12 then delete;
if rdq=. then delete;

do

where
  datadate>=&start_date and datadate<=&end_date and
  INDFMT='INDL' and DATAFMT='STD' and POPsrc='D' and CONSOL='C' and not(
    finalq ne "Y" or iid ne "01" or fyr~=12 or rdq=.
  )
;

WHERE conditions outperform subsetting IFs.

 

It might also be that your code runs so long it exceeds a limit that drops an inactive network connection.

Doesn't WRDS use SAS/CONNECT?

margalits1
Obsidian | Level 7
Hello Kurt
thank you I will try your suggested chnges and update whether it works 🙂
Yes WRDS uses SAS/connect:
I have it also in a macro:
%macro wrds_connect;
%let wrds=wrds-cloud.wharton.upenn.edu 4016;
options comamid=TCP remote=WRDS;
signon username=_prompt_;
run;

rsubmit;
libname crsp '/wrds/crsp/sasdata/a_stock';
libname temp '/scratch/tau/margalit';
libname cc '/wrds/crsp/sasdata/a_ccm';
libname index '/wrds/crsp/sasdata/a_indexes';
libname comp '/wrds/comp/sasdata/nam';
libname ix '/wrds/crsp/sasdata/ix';
libname home '/home/tau/margalit';
run;


%mend;
%wrds_connect;

Thanks again ! will update results
margalits1
Obsidian | Level 7

Hello Kurt,

I wanted to update you that I added the changes and the code worked.

Thank you very much for your help!

Really appricieate it.

Margalit

sas-innovate-wordmark-2025-midnight.png

Register Today!

Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.


Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 5 replies
  • 1261 views
  • 1 like
  • 2 in conversation