I am trying to understand a code written in SAS to replicate the results from a paper. It basically involves regressions. I will use Stata or python to replicate. In what follows are the parts that I can't understand. length age20 age30 age40 age50 workly 3;
if (wkswork=0) then lfin1=working;
else lfin1=.;
if (wkswork<50) then lfin2=working;
else lfin2=.;
if (wkswork=>50) then lfout1=(1-working);
else lfout1=.;
if (wkswork=0) then changer=.;
length agegrp age20 age30 age40 age50 educgrp region 3;
trend=(year-87);
trend2=trend**2;
dis_trend= trend*disabl1;
dyr_9497=(94<=year<=97)*disabl1;
%macro code(var,num);
dis_&var.&num.= disabl1*(&var.=&num.);
%mend;
%code(racegrp,1); %code(racegrp,2); %code(racegrp,3);
%code(region,1); %code(region,2); %code(region,3);
%code(region,4); %code(region,5); %code(region,6);
%code(region,7); %code(region,8); %code(region,9);
%code(agegrp,20); %code(agegrp,30); %code(agegrp,40); %code(agegrp,50);
%code(educgrp,1); %code(educgrp,2); %code(educgrp,3);
%macro code2(var,num);
&var._d&num.= (&var.=&num.);
%mend;
%code2(year,89); %code2(year,90); %code2(year,91); %code2(year,92);
%code2(year,93); %code2(year,94); %code2(year,95); %code2(year,96);
%code2(year,97);
length dis_yr89-dis_yr97 trend trend2 3;
* ID VARS FOR LATER;
classid= 1000*agegrp+100*racegrp+10*educgrp+region;
sampsex= sample||sex;
classid2= classid||sample||sex;
count=1;
**********************************;
* COLUMN 4;
* SORT BY BY-GROUP;
**********************************;
proc sort data=one;
by classid2;
where sex=1 | (sex=2 & sample='young');
run;
proc reg data=one outest=ests outseb noprint;
weight fnlwgt2;
model wkswork = year_d89-year_d96 disabl1 dis_yr92-dis_yr97;
by classid2;
run;
* MAKE TWO DATASETS -- ONE WITH STANDARD ERRORS AND ONE WITH COEFF ESTS;
data coeffs (keep=_TYPE_ classid2 sampsex disabl1 dis_yr92-dis_yr97)
ses (keep=_TYPE_ classid2 sampsex disabl1 dis_yr92-dis_yr97);
set ests;
if _TYPE_='PARMS' then output coeffs;
if _TYPE_='SEB' then output ses;
run;
* GET SHARE OF DISABLED WHO LIVE IN EACH CLASS;
proc summary data=one;
weight fnlwgt2;
where disabl1=1 & (88<=year<=91);
by classid sampsex;
var count; output out=wtds sumwgt=diswt;
run;
proc sort data=one; by sampsex; run;
proc summary data=one;
weight fnlwgt2;
where disabl1=1 & (88<=year<=91);
by sampsex;
var count;
output out=totals sumwgt=totdiswt
run;
proc sort data=wtds; by sampsex;
proc sort data=totals; by sampsex; run;
data wtds2;
merge wtds totals;
by sampsex; run;
data wtds2;
set wtds2;
if diswt=. then diswt=0;
diswt=diswt/100;
totdiswt=totdiswt/100;
fracdis=diswt/totdiswt;
drop _FREQ_ _TYPE_;
classid2=classid||sampsex;
run;
* MERGE ONTO REGRESSION AND STANDARD ERROR RESULTS;
proc sort data=wtds2; by classid2;
proc sort data=coeffs; by classid2;
proc sort data=ses; by classid2;
run;
data makeests;
merge wtds2 (in=a) coeffs (in=b) ses (in=c rename=(disabl1=disabl1_s dis_yr92=dis_yr92_s dis_yr93=dis_yr93_s
dis_yr94=dis_yr94_s dis_yr95=dis_yr95_s dis_yr96=dis_yr96_s dis_yr97=dis_yr97_s));
by classid2;
if a & b & c;
* SQUARE UP TO VARIANCE TO DO WEIGHTING;
disabl1_s=disabl1_s**2; dis_yr92_s=dis_yr92_s**2; dis_yr93_s=dis_yr93_s**2;
dis_yr94_s=dis_yr94_s**2; dis_yr95_s=dis_yr95_s**2;
dis_yr96_s=dis_yr96_s**2; dis_yr97_s=dis_yr97_s**2;
run;
proc sort data=makeests;
by sampsex;
run;
data finalests (keep=disabl1_c dis_yr92_c dis_yr93_c dis_yr94_c dis_yr95_c
dis_yr96_c dis_yr97_c disabl1_se dis_yr92_se dis_yr93_se dis_yr94_se dis_yr95_se
dis_yr96_se dis_yr97_se disabl1_wt dis_yr92_wt dis_yr93_wt dis_yr94_wt dis_yr95_wt
dis_yr96_wt dis_yr97_wt sampsex);
set makeests;
by sampsex;
retain disabl1_c 0 dis_yr92_c 0 dis_yr93_c 0 dis_yr94_c 0 dis_yr95_c 0
dis_yr96_c 0 dis_yr97_c 0
disabl1_se 0 dis_yr92_se 0 dis_yr93_se 0 dis_yr94_se 0 dis_yr95_se 0
dis_yr96_se 0 dis_yr97_se 0
disabl1_wt 0 dis_yr92_wt 0 dis_yr93_wt 0 dis_yr94_wt 0 dis_yr95_wt 0
dis_yr96_wt 0 dis_yr97_wt 0;
correct= (disabl1_s ne . & dis_yr92_s ne . & dis_yr93_s ne . &
dis_yr94_s ne . & dis_yr95_s ne . & dis_yr96_s ne . &
dis_yr97_s ne .);
if first.sampsex then do;
disabl1_c=0; dis_yr92_c=0; dis_yr93_c=0; dis_yr94_c=0; dis_yr95_c=0;
dis_yr96_c=0; dis_yr97_c=0;
disabl1_se=0; dis_yr92_se=0; dis_yr93_se=0; dis_yr94_se=0; dis_yr95_se=0;
dis_yr96_se=0; dis_yr97_se=0;
disabl1_wt=0; dis_yr92_wt=0; dis_yr93_wt=0; dis_yr94_wt=0; dis_yr95_wt=0;
dis_yr96_wt=0; dis_yr97_wt=0;
%macro rep(var);
oldway=0; if &var._s ne . then oldway=1;
if &flag=1 then &var._c= &var._c+fracdis*&var.;
if &flag=1 then &var._se= &var._se+(fracdis**2)*&var._s;
if &flag=1 then &var._wt= &var._wt+fracdis;
%mend;
%rep(disabl1); %rep(dis_yr92); %rep(dis_yr93); %rep(dis_yr94);
%rep(dis_yr95); %rep(dis_yr96); %rep(dis_yr97);
end;
else do;
%rep(disabl1); %rep(dis_yr92); %rep(dis_yr93); %rep(dis_yr94);
%rep(dis_yr95); %rep(dis_yr96); %rep(dis_yr97);
end;
if last.sampsex then output;
run;
data finalests (keep=disabl1_c dis_yr92_c dis_yr93_c dis_yr94_c dis_yr95_c
dis_yr96_c dis_yr97_c disabl1_se dis_yr92_se dis_yr93_se
dis_yr94_se dis_yr95_se dis_yr96_se dis_yr97_se sampsex);
set finalests;
test='correct';
%macro runit(var);
if test=&marker then do;
&var._c=&var._c/&var._wt;
&var._se=(&var._se/(&var._wt**2))**.5;
end;
%mend;
%runit(disabl1); %runit(dis_yr92); %runit(dis_yr93); %runit(dis_yr94);
%runit(dis_yr95); %runit(dis_yr96); %runit(dis_yr97);
run;
proc print data=finalests; run;
* LOOK AT SAMPLE SIZES;
proc freq data=one;
table sampsex;
where fnlwgt2>0;
run; I am sorry it is too long. The original is here: http://economics.mit.edu/files/846 I only selected the parts I can't figure out. ANY help will be appreciated. Thanks!
... View more