Extract from the Exploratory Data Analysis Code /* use proc freq to compute the number of levels for each question: task complexity */ PROC FREQ DATA = WORK.DATA12345C NLEVELS ; TABLES SQ01 - SQ59 / NOCOL ; ODS OUTPUT NLEVELS = WORK.QUESTLEVELS_1 ; RUN; /* sort (for future merge) and simplify the output data set produced by proc freq */ PROC SORT DATA = WORK.QUESTLEVELS_1 OUT = WORK.QUESTLEVELS_2 ( DROP = TableVarLabel ) EQUALS ; BY TABLEVAR ; /* question – task number */ RUN; /* explanatory variables: complexity of the task ( levels ) PER QUESTION distance function 1 and distance function2 */ /* target variable: DURATION PER QUESTION ( THE complexity of the task ) */ /* source data sets: QUESTLEVELS and DATA345DDSS */ /* PROC MEANS produces the ODS TABLE summary for USE IN the output data set */ /* exploratory data analysis for each of the three explanatory variables */ /* x1 = questlevels ; x2 = DL01 - DL59 ; x3 = DE01 - DE59: target variable = duration */ PROC MEANS DATA = WORK.DATA345DDSS STACKODS N MIN P25 MEDIAN MEAN P75 MAX MAXDEC = 2; VAR DQ01 - DQ59 ; /* compute median duration per question: sq01 to sq59 */ ODS OUTPUT SUMMARY = WORK.DATA3DP_STATS ; RUN; PROC MEANS DATA = WORK.DATA345DDSS STACKODS N MIN P25 MEDIAN MEAN P75 MAX MAXDEC = 2; VAR DL01 - DL59 ; /* compute median distance 1 per question: sq01 to sq59 */ ODS OUTPUT SUMMARY = WORK.DATADF1_STATS ; RUN; PROC MEANS DATA = WORK.DATA345DDSS STACKODS N MIN P25 MEDIAN MEAN P75 MAX MAXDEC = 2; VAR DE01 - DE59 ; /* compute median distance 2 per question: sq01 to sq59 */ ODS OUTPUT SUMMARY = WORK.DATADF2_STATS ; RUN; DATA WORK.DATA3DPM_STATS ( RENAME =( VARIABLE = QUESTION MEDIAN = DURATION ) ) ; SET WORK.DATA3DP_STATS ; RUN; DATA WORK.DATADF1M_STATS ( RENAME =( VARIABLE = QUESTION MEDIAN = DISTANCE1 ) ) ; SET WORK.DATADF1_STATS ; RUN; DATA WORK.DATADF2M_STATS ( RENAME =( VARIABLE = QUESTION MEDIAN = DISTANCE2 ) ) ; SET WORK.DATADF2_STATS ; RUN; DATA WORK.QUESTLEVELSM ( RENAME =( TABLEVAR = QUESTION NLEVELS_1 = NLEVELS ) ) ; SET WORK.QUESTLEVELS ; RUN; /* CHANGE THE VALUES OF THE QUESTION VARIABLE TO MATCH SQ01 TO SQ59 ALL 4 DATA SETS */ DATA DATA3DPQ_STATS ; /* No Quotes: DQ01-DQ59 TO SQ01-SQ59 */ SET DATA3DPM_STATS ; SUBSTR(QUESTION,1,2)="SQ"; RUN ; DATA DATADF1Q_STATS ; /* Single Quotes: DL01-DL59 TO SQ01-SQ59 */ SET DATADF1M_STATS ; SUBSTR(QUESTION,1,2)="SQ"; RUN; DATA DATADF2Q_STATS ; /* Double Quotes: DE01-DE59 TO SQ01-SQ59 */ SET DATADF2M_STATS ; SUBSTR(QUESTION,1,2)="SQ"; RUN ; PROC SORT DATA = WORK.DATA3DPQ_STATS OUT = WORK.DATA3DPS ( DROP = LABEL N MIN P25 MEAN P75 MAX )EQUALS ; BY QUESTION ; RUN; PROC SORT DATA = WORK.DATADF1Q_STATS OUT = WORK.DATADF1S ( DROP = LABEL N MIN P25 MEAN P75 MAX )EQUALS ; BY QUESTION ; RUN; PROC SORT DATA = WORK.DATADF2Q_STATS OUT = WORK.DATADF2S ( DROP = LABEL N MIN P25 MEAN P75 MAX )EQUALS ; BY QUESTION ; RUN; PROC SORT DATA = WORK.QUESTLEVELSM OUT = WORK.QUESTLEVELSS ( DROP = NLEVELS_2 NLEVELS_3 )EQUALS ; BY QUESTION ; RUN; DATA QUIZDATA ; MERGE QUESTLEVELSS DATA3DPS DATADF1S DATADF2S; BY QUESTION ; RUN; The final required data set looks like this : [image: Inline images 1] ##- Please type your reply above this line. Simple formatting, no attachments. -##
... View more