Hi, I was wondering if SAS had any automated feature for creating a Regression Discontinuity plot. What I want is a function that will split the running variable by a given binwidth and then create a binned scatterplot. The bins should be counted from the point of discontinuity so that each bin contains either all treatment or all control observations. For example, in the plot below, X is the running variable. There is a threshold for treatment assignment at 120 represented by a red, dashed vertical line. The binwidth is equal to 10. Consequently, the bins are counted to the right and to the left of the threshold (e.g. 110-120, 100-110,... on the left and 120-130, 130-140... on the right. The observations are binned into these categories and then plotted as points. The y-value is determined by the mean for each bin. As you can see from this example, ideally if a treatment is effective, we should see a discontinuity at the threshold, which we do in this case. I created the above plot using this code. However, I was wondering whether or not there was a procedure in SAS that does the same thing. I believe there are functions in R and Stata that create similar plots. %MACRO BINSCATTER_TEST(DATA, X, Y, CENTER = 0.5, BINSIZE =10, HIST = 1, BINSCATTER = 1, X_TICK = 50, PRINT = 0); /*FIND THE RANK FARTHEST AWAY FROM CUT SCORE*/ /*NOTE: PROBABLY CAN FIND WAY TO DO THIS INSIDE PROC IML STEP*/ PROC SQL NOPRINT; SELECT MAX(ABS( &CENTER - MIN_&X), (MAX_&X - &CENTER)) INTO :FARTHEST_RNK FROM (SELECT MIN(&X) AS MIN_&X, MAX(&X) AS MAX_&X FROM &DATA); QUIT; %PUT &=FARTHEST_RNK; PROC IML; /*MATRICES CREATED INSIDE PROC IML: */ /*NEG, POS, NEG_REV, CUTS, MIN_CUT, MAX_CUT, B, START_MIDS, END_MIDS, MIDS*/ USE &DATA; READ ALL VAR {&X} INTO X; CLOSE &DATA; %LET MID_BIN = &BINSIZE / 2; POS = DO(&CENTER, &CENTER + &FARTHEST_RNK + &BINSIZE, &BINSIZE); NEG = DO(&CENTER, &CENTER -&FARTHEST_RNK - &BINSIZE, -&BINSIZE); /*NOTE: NEED TO DROP CUT SCORE FROM ONE OF THESE VECTORS TO PREVENT OVERLAP*/ NEG = NEG[ , 2:NCOL(NEG)]; /*CREATE 'MINI-FUNCTION' TO REVERSE NEG VECTOR*/ START FLIPLR(A); RETURN A[, NCOL(A):1]; FINISH; NEG_REV = FLIPLR(NEG); /*CONCATENATE REVESED NEG AND POS INTO ONE VECTOR CONTAINING ALL CUTPOINTS*/ CUTS = NEG_REV||POS; /*CREATE VECTOR CONTAINING BIN ASSIGNMENT FOR EACH OBSERVATION IN DATASET*/ B = BIN(X, CUTS); /*FIND MIDPOINTS OF BINS TO PLOT POINTS INTO SCATTER PLOTS*/ START_MIDS = MIN(CUTS) + (&MID_BIN); END_MIDS = MAX(CUTS) - (&MID_BIN); MIDS = DO(START_MIDS, END_MIDS, &BINSIZE); /*CREATE MACRO VARIABLES FOR PROCS GRAPHICS*/ CALL SYMPUTX("MIN_CUT", MIN(CUTS)); CALL SYMPUTX("MAX_CUT", MAX(CUTS)); CALL SYMPUTX("BIN_START", &MIN_CUT - &MID_BIN); CALL SYMPUTX("XMIN", &X_TICK*FLOOR(&MIN_CUT / &X_TICK)); CALL SYMPUTX("XMAX", &X_TICK*CEIL(&MAX_CUT / &X_TICK)); /*OUTPUT DATASET CONTAINING EACH OBSERVATION OF X AND B*/ CREATE BINNED_IML VAR {X B}; APPEND; CLOSE BINNED_IML; /*OUTPUT DATASET CONTAINING THE MIDPOINTS FOR EACH BIN*/ CREATE MID_PTS VAR {MIDS CUTS}; APPEND; CLOSE; QUIT; /*APPEND BINNED_IML TO ORIGINAL DATASET (CBIND)*/ DATA DATA_BINNED_IML; SET &DATA; SET BINNED_IML; RUN; ODS GRAPHICS ON / WIDTH=6IN HEIGHT=4IN NOBORDER; /*-------------- PRODUCE GRAPHIC FOR EACH X VARIABLE ------------*/ /*--- HISTOGRAM ---*/ %IF &HIST = 1 %THEN %DO; PROC SGPLOT DATA = &DATA; HISTOGRAM &X / BINSTART = &BIN_START BINWIDTH = &BINSIZE SCALE = COUNT; XAXIS MAX=&MAX_CUT MIN= &MIN_CUT; REFLINE &CENTER / AXIS = X LINEATTRS=(COLOR = RED PATTERN = DASH); XAXIS VALUES=(&XMIN TO &XMAX BY &X_TICK) ; RUN; %END; /*-------------- PRODUCE GRAPHICS FOR EACH Y VARIABLE ------------*/ %DO I=1 %TO %SYSFUNC(COUNTW(&Y)); %LET Y_I = %SCAN(&Y, &I); /*--- CALCULATE MEAN OF Y VARIABLE FOR EACH BIN ---*/ PROC MEANS DATA = DATA_BINNED_IML NOPRINT; CLASS B; VAR &Y_I ; OUTPUT OUT = BIN_MEANS(WHERE=(_TYPE_=1)) MEAN=; RUN; /*--- MERGE MID_PTS WITH BIN_MEANS BY B ---*/ /*NOTE: WE NEED THIS STEP IN CASE SOME BINS HAVE ZERO OBS*/ DATA MID_PTS; SET MID_PTS; B = _N_; RUN; PROC SORT DATA= MID_PTS; BY B; RUN; PROC SORT DATA= BIN_MEANS; BY B; RUN; DATA BINNED(RENAME=(MIDS = &X)); MERGE BIN_MEANS(RENAME =(_FREQ_=N) DROP=_TYPE_) MID_PTS; BY B; RUN; /*--- BINNED SCATTER PLOT ---*/ %IF &BINSCATTER = 1 %THEN %DO; PROC SGPLOT DATA = BINNED; SCATTER X = &X Y = &Y_I ; REFLINE &CENTER / AXIS = X LINEATTRS=(COLOR = RED PATTERN = DASH); XAXIS VALUES=(&XMIN TO &XMAX BY &X_TICK) ; RUN; %END; %END; ODS GRAPHICS OFF; /*CHECK: MAKE SURE BIN CATEGORIES ARE CORRECT*/ %IF &PRINT = 1 %THEN %DO; PROC PRINT DATA = MID_PTS; RUN; PROC SORT DATA= DATA_BINNED_IML; BY B; RUN; PROC PRINT DATA = DATA_BINNED_IML; BY B; VAR B TREAT X Y; RUN; PROC PRINT DATA = BIN_MEANS; RUN; PROC PRINT DATA = BINNED; RUN; %END; %MEND BINSCATTER_TEST; /*----------------------------------------------------------------------------------------*/ /*-------------------------------------- FAKE DATA ---------------------------------------*/ /*----------------------------------------------------------------------------------------*/ DATA FAKE; DO STUDENT_ID =1 TO 1000; X = RAND("NORMAL", 120, 20); IF X GE 120 THEN DO; TREAT = 1; Y = X + 40 + RAND("NORMAL", 0, 5); END; ELSE DO; ; TREAT = 0; Y = X + RAND("NORMAL", 0, 5); END; OUTPUT; END; RUN; %BINSCATTER_TEST(FAKE, X, Y, CENTER=120, BINSIZE = 10, X_TICK = 10, PRINT = 0);
... View more