Re: Translate a SAS code

R_Moura · Posted 12-03-2016 03:22 PM

I am trying to understand a code written in SAS to replicate the results from a paper. It basically involves regressions. I will use Stata or python to replicate. In what follows are the parts that I can't understand.

length age20 age30 age40 age50 workly 3;

if (wkswork=0) then lfin1=working;
  else lfin1=.;
if (wkswork<50) then lfin2=working;
  else lfin2=.;
if (wkswork=>50) then lfout1=(1-working);
  else lfout1=.;
if (wkswork=0) then changer=.;

length agegrp age20 age30 age40 age50 educgrp region 3;

trend=(year-87);
trend2=trend**2;

dis_trend= trend*disabl1;

dyr_9497=(94<=year<=97)*disabl1;

%macro code(var,num);
    dis_&var.&num.= disabl1*(&var.=&num.);
%mend;

%code(racegrp,1); %code(racegrp,2); %code(racegrp,3);
%code(region,1); %code(region,2); %code(region,3);
    %code(region,4); %code(region,5); %code(region,6);
    %code(region,7); %code(region,8); %code(region,9);
%code(agegrp,20); %code(agegrp,30); %code(agegrp,40); %code(agegrp,50); 
%code(educgrp,1); %code(educgrp,2); %code(educgrp,3);

%macro code2(var,num);
    &var._d&num.= (&var.=&num.);
%mend;

%code2(year,89); %code2(year,90); %code2(year,91); %code2(year,92);
%code2(year,93); %code2(year,94); %code2(year,95); %code2(year,96);
%code2(year,97);

length dis_yr89-dis_yr97 trend trend2 3;

* ID VARS FOR LATER;
classid= 1000*agegrp+100*racegrp+10*educgrp+region;
sampsex= sample||sex;
classid2= classid||sample||sex;
count=1;    

**********************************;
* COLUMN 4;
* SORT BY BY-GROUP;
**********************************;
proc sort data=one; 
by classid2; 
where sex=1 | (sex=2 & sample='young');
run;

proc reg data=one outest=ests outseb noprint;
weight fnlwgt2; 
model wkswork =  year_d89-year_d96 disabl1 dis_yr92-dis_yr97;
by classid2;
run;

* MAKE TWO DATASETS -- ONE WITH STANDARD ERRORS AND ONE WITH COEFF ESTS;
data coeffs (keep=_TYPE_ classid2 sampsex disabl1 dis_yr92-dis_yr97) 
   ses    (keep=_TYPE_ classid2 sampsex disabl1 dis_yr92-dis_yr97);
set ests;
  if _TYPE_='PARMS' then output coeffs;
  if _TYPE_='SEB' then output ses;
run;

* GET SHARE OF DISABLED WHO LIVE IN EACH CLASS;
proc summary data=one;
weight fnlwgt2;
where disabl1=1 & (88<=year<=91);
by classid sampsex;
var count; output out=wtds sumwgt=diswt;
run;

proc sort data=one; by sampsex; run;

proc summary data=one; 
weight fnlwgt2;
where disabl1=1 & (88<=year<=91);
by sampsex;
var count;
output out=totals sumwgt=totdiswt
run;

proc sort data=wtds; by sampsex; 

proc sort data=totals; by sampsex; run; 

data wtds2;
merge wtds totals;
by sampsex; run;
data wtds2;
set wtds2;
if diswt=. then diswt=0;
diswt=diswt/100;
    totdiswt=totdiswt/100;
    fracdis=diswt/totdiswt;
drop _FREQ_ _TYPE_;
classid2=classid||sampsex;
run;

* MERGE ONTO REGRESSION AND STANDARD ERROR RESULTS;
proc sort data=wtds2; by classid2;
proc sort data=coeffs; by classid2;
proc sort data=ses; by classid2; 
run;

data makeests;
merge wtds2 (in=a) coeffs (in=b) ses (in=c rename=(disabl1=disabl1_s      dis_yr92=dis_yr92_s dis_yr93=dis_yr93_s
        dis_yr94=dis_yr94_s dis_yr95=dis_yr95_s dis_yr96=dis_yr96_s  dis_yr97=dis_yr97_s));
by classid2; 
if a & b & c;

* SQUARE UP TO VARIANCE TO DO WEIGHTING;
disabl1_s=disabl1_s**2; dis_yr92_s=dis_yr92_s**2; dis_yr93_s=dis_yr93_s**2; 
dis_yr94_s=dis_yr94_s**2; dis_yr95_s=dis_yr95_s**2;
dis_yr96_s=dis_yr96_s**2; dis_yr97_s=dis_yr97_s**2;
run;

proc sort data=makeests;
by sampsex;
run;

data finalests (keep=disabl1_c dis_yr92_c dis_yr93_c dis_yr94_c dis_yr95_c 
        dis_yr96_c dis_yr97_c disabl1_se dis_yr92_se dis_yr93_se dis_yr94_se  dis_yr95_se 
        dis_yr96_se dis_yr97_se disabl1_wt dis_yr92_wt dis_yr93_wt dis_yr94_wt dis_yr95_wt 
        dis_yr96_wt dis_yr97_wt sampsex);
set makeests;
by sampsex;
retain disabl1_c 0 dis_yr92_c 0 dis_yr93_c 0 dis_yr94_c 0 dis_yr95_c 0 
        dis_yr96_c 0 dis_yr97_c 0
    disabl1_se 0 dis_yr92_se 0 dis_yr93_se 0 dis_yr94_se 0 dis_yr95_se 0 
        dis_yr96_se 0 dis_yr97_se 0
    disabl1_wt 0 dis_yr92_wt 0 dis_yr93_wt 0 dis_yr94_wt 0 dis_yr95_wt 0 
        dis_yr96_wt 0 dis_yr97_wt 0;

correct= (disabl1_s ne . & dis_yr92_s ne . & dis_yr93_s ne . & 
          dis_yr94_s ne . & dis_yr95_s ne . & dis_yr96_s ne . & 
          dis_yr97_s ne .);

if first.sampsex then do;
disabl1_c=0; dis_yr92_c=0; dis_yr93_c=0; dis_yr94_c=0; dis_yr95_c=0; 
    dis_yr96_c=0; dis_yr97_c=0;
disabl1_se=0; dis_yr92_se=0; dis_yr93_se=0; dis_yr94_se=0; dis_yr95_se=0; 
    dis_yr96_se=0; dis_yr97_se=0;
disabl1_wt=0; dis_yr92_wt=0; dis_yr93_wt=0; dis_yr94_wt=0; dis_yr95_wt=0; 
    dis_yr96_wt=0; dis_yr97_wt=0;
%macro rep(var);
    oldway=0; if &var._s ne . then oldway=1;  
     if &flag=1 then &var._c= &var._c+fracdis*&var.;
     if &flag=1 then &var._se= &var._se+(fracdis**2)*&var._s;
     if &flag=1 then &var._wt= &var._wt+fracdis;
%mend;
%rep(disabl1);  %rep(dis_yr92); %rep(dis_yr93); %rep(dis_yr94);
%rep(dis_yr95); %rep(dis_yr96); %rep(dis_yr97);
end;

else do;
%rep(disabl1);  %rep(dis_yr92); %rep(dis_yr93); %rep(dis_yr94);
%rep(dis_yr95); %rep(dis_yr96); %rep(dis_yr97);
end;

if last.sampsex then output;
run;

data finalests (keep=disabl1_c dis_yr92_c dis_yr93_c dis_yr94_c dis_yr95_c 
           dis_yr96_c dis_yr97_c disabl1_se dis_yr92_se dis_yr93_se 
           dis_yr94_se dis_yr95_se dis_yr96_se dis_yr97_se sampsex);
set finalests;
test='correct';
%macro runit(var);
    if test=&marker then do;
    &var._c=&var._c/&var._wt;
    &var._se=(&var._se/(&var._wt**2))**.5;
    end;
%mend;
%runit(disabl1);    %runit(dis_yr92); %runit(dis_yr93); %runit(dis_yr94);
%runit(dis_yr95); %runit(dis_yr96); %runit(dis_yr97);
run;

proc print data=finalests;  run;

* LOOK AT SAMPLE SIZES;
proc freq data=one;
table sampsex;
where fnlwgt2>0;
run;

I am sorry it is too long. The original is here: http://economics.mit.edu/files/846 I only selected the parts I can't figure out.

ANY help will be appreciated.

Thanks!

Reeza · Posted 12-03-2016 08:22 PM

This isn't a good question. It's too broad and requires way too much work (IMO) from a volunteer.

Check the documentation and tutorials. The first SAS programming course is free.

Converting code while knowing only one of the languages is not easy. Sometimes it's easier to implement from the methodology or algorithm if that's specified in the paper and compare the end results to make sure your results are the same. If you try to do a one to one conversion it ends up being inefficient anyways.

Is there a specific reason for the conversion? If you need to run the model why not use SAS?

R_Moura · Posted 12-03-2016 08:33 PM

Yes, you're right, that is too broad. I will try to solve that, piece by
piece.

You asked the reason, it is just that I have classes that use R or Stata,
and it would take me too long to learn SAS in the meantime. I thought that
you would find this code easy enough to explain to me in a few lines what
was going on there.

Thanks,

##- Please type your reply above this line. Simple formatting, no
attachments. -##

Reeza · Posted 12-03-2016 09:41 PM

Creating some variables, running a regression model, storing the results in the most generic of terms.

I don't know that I'd consider this code efficient either :(.

Good Luck!

R_Moura · Posted 12-04-2016 06:32 AM

Thank you!

Translate a SAS code