BookmarkSubscribeRSS Feed
Seonjin
Calcite | Level 5

Hello, I'm a student in the accounting department and I'm trying to do some calculations using SAS for the first time. 
Actually, I would like to ask for some help with Regression calculation. 
I have calculated the real earnings management and now I'm looking for a formula to calculate the REG. 
I will be very happy to have all of you as tutors to learn more and get used to the program.
Thank you very much.  

libname user 'C:\CRISIS' ;
           * b1=total assests,  b2=receivables, b3=provision, b4=inventory, b5=tangible assets, b6=land, b7=impairment, b8=subsidiary,
              b9=asset in construction, b10=impairment, b11=subsidiary, b12=liability 
              i1=sales  i2=cost of goods sold  i3=sales and administrative expense  i4=tax and utility  i5=depreciation
              i6=rent   i7= insurance  i8=net income, i9=EPS, c1=operating cash flow ;
data XXX ;
  infile 'C:\CRISIS\KSP97.csv' dlm=',' firstobs=3  lrecl=20000 ;
  input code  yr mon  icode opn $ big b1-b12  i1-i9  c1  ;
       if b1=0  or i1=0  then delete ;
       if mon=12 ;

run;

* The data of the same year of a firm is deleted ;
proc sort nodupkey data=XXX  out=XXX dupout=OO ;  by code yr ;  run;
* duplicated data go into OO ;


* INDUSTRY NUMBER  ;
proc sort data=XXX ; by yr  icode ; run;
proc means data=XXX  n  noprint ;         * here,  noprint is very important   ;
                var  mon  ;
                class  yr  icode   ;
                output out=yyy  n=indnum   ;  * n indicates the industry number  ;
run;
data  indnum ;  set yyy ;
                if yr=. or  icode=.  then delete ;
                keep yr  icode   indnum ;
run;

*  MERGE financial raw data with industry number ;

proc sort data=XXX ; by  yr icode   ; run;
proc sort data=indnum ; by yr  icode ; run;

data XX ;  merge  XXX  indnum;  by yr  icode;
             if indnum < 15  then delete;
run;


/* Variables Calculation for Real Earings Models  */

proc sort data=XX ; by code yr ;  run;

data VAR1 ;  set XX ;

            * b1=total assests,  b2=receivables, b3=provision, b4=inventory, b5=tangible assets, b6=land, b7=impairment, b8=subsidiary,
              b9=asset in construction, b10=impairment, b11=subsidiary, b12=liability 
              i1=sales  i2=cost of goods sold  i3=sales and administrative expense  i4=tax and utility  i5=depreciation
              i6=rent   i7= insurance  i8=net income, i9=EPS, c1=operating cash flow ;

        pcode1=lag(code) ;  pcode2=lag2(code) ;

pb1=lag(b1) ; pb2=lag(b2);  pb4=lag(b4);  pi1=lag(i1) ;  ppi1=lag2(i1);  
ds1=i1-pi1 ;  ds2=pi1-ppi1 ;   db2=b2-pb2 ;   db4=b4-pb4;
pc=i2+db4 ;  de=i3-i4-i5-i6-i7 ;

* Variables for real earnings models  ;
y1=c1/pb1 ;  x1=1/pb1 ; x2=i1/pb1;  x3=ds1/pb1 ; x4=ds2/pb1 ;  x5=pi1/pb1 ;
y2=pc/pb1 ; 
y3=de/pb1 ;

if code=pcode1=pcode2 ;
run;
*proc print ;  run ;


/* Estimation of REM  */
*outliers are winsorized by year and industry   ;

proc means data=VAR1  p1 p99  noprint ; 
        var  y1-y3 x1-x5 ;
CLASS  yr icode ;
output  out=outlier     p1=  p1_y1    p1_y2    p1_y3    p1_x1   p1_x2   p1_x3    p1_x4   p1_x5
                                     p99= p99_y1  p99_y2  p99_y3  p99_x1 p99_x2  p99_x3  p99_x4  p99_x5 ;
run;

data  OUTLIER ;  set  OUTLIER ;
         if yr=.  or  icode=.  then delete ;
        drop _type_  _freq_ ;
run;
proc sort data=OUTLIER ; by yr icode ; run;
proc sort data=VAR1 ; by yr icode ; run;

data VVV  ;       *  1% of upper and lower outliers are winsorised  ;
    merge VAR1  OUTLIER ;  by yr icode ;
if  y1 < p1_y1  then y1=p1_y1 ;  if  y1 > p99_y1 then y1=p99_y1 ;
if  y2 < p1_y2  then y2=p1_y2 ;  if  y2 > p99_y2 then y2=p99_y2 ;
if  y3 < p1_y3  then y3=p1_y3 ;  if  y3 > p99_y3 then y3=p99_y3 ;
if  x1 < p1_x1  then x1=p1_x1 ;  if  x1 > p99_x1 then x1=p99_x1 ;
if  x2 < p1_x2  then x2=p1_x2 ;  if  x2 > p99_x2 then x2=p99_x2 ;
if  x3 < p1_x3  then x3=p1_x3 ;  if  x3 > p99_x3 then x3=p99_x3 ;
if  x4 < p1_x4  then x4=p1_x4 ;  if  x4 > p99_x4 then x4=p99_x4 ;
if  x5 < p1_x5  then x5=p1_x5 ;  if  x5 > p99_x5 then x5=p99_x5 ;
       
run;

/*  Estimating Real Earnings Model by year and Industry  */

proc sort data=VVV ;   by yr  icode ; run;

proc reg data=VVV  noprint outest=coef1;            /*  Abnormal CFO  */
  model  y1= x1-x3  ;
  by yr  icode;            
run;
data coef1; set coef1;
  rename intercept=al1  x1=be11 x2=be21  x3=be31  ;
  keep yr  icode intercept  x1-x3  ;
  run;
*proc print; run;

proc reg data=VVV  noprint outest=coef2;          /*  Abnormal  production cost  */
   model  y2= x1-x4 ;
  by yr  icode;            
  run;
data coef2; set coef2;
  rename intercept=al2  x1=be12  x2=be22  x3=be32  x4=be42  ;
  keep yr  icode intercept  x1-x4 ;
  run;
*proc print; run;

proc reg data=VVV  noprint outest=coef3 ;         /*  Abnormal  discretionary expense  */
  model  y3= x1 x5  ;
  by yr  icode;           
  run;
data coef3; set coef3;
  rename intercept=al3  x1=be13  x5=be53 ;
  keep yr  icode intercept  x1 x5 ;
  run;
*proc print; run;



/*  Calculating real earnings management  */
proc sort data=coef1;  by yr  icode; run;
proc sort data=coef2;  by yr  icode; run;
proc sort data=coef3;  by yr  icode; run;
proc sort data=VAR1 ;  by  yr  icode ;  run ;


data REM ;       * Here, REM are calculated using VAR01, not VVV, a dataset for estimation   ;
        merge  VAR1  coef1  coef2  coef3 ; by yr icode;      

    acfo=y1-(al1+be11*x1+be21*x2+be31*x3) ; 
  apc=y2-(al2+be12*x1+be22*x2+be32*x3+be42*x4) ; 
  ade=y3-(al3+be13*x1+be53*x5) ; 

rem=acfo*(-1)+apc+ade*(-1) ;

run;
proc sort ; by code yr ; run;

proc print; run;
quit ;
2 REPLIES 2
PaigeMiller
Diamond | Level 26

To provide code, include it in your message, not as an attachment, by clicking on the little running man icon and pasting your code into the window that appears. To include data, please include a portion of your data as text, not as an attachment, by following these examples and instructions.

--
Paige Miller
PaigeMiller
Diamond | Level 26

Also please tell us how the regression should go, what variables are the independent (predictor) variables and what variables are the dependent (response) variables.

--
Paige Miller

sas-innovate-wordmark-2025-midnight.png

Register Today!

Join us for SAS Innovate 2025, our biggest and most exciting global event of the year, in Orlando, FL, from May 6-9. Sign up by March 14 for just $795.


Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 2 replies
  • 459 views
  • 1 like
  • 2 in conversation