So what you suggested was working , however they switched the dataset and resent me the code, and I got through most of it an d the same error started again, See the large space in the code : name PROJECT "/home/u61481478/my_shared_file_links/u59974645/PROJECT_DATA/"; run; DATA work.BRFSS23; SET "/home/u61481478/my_shared_file_links/u59974645/PROJECT_DATA/brfss2023core.sas7bdat" (KEEP= _AGEG5YR _STATE SEXVAR GENHLTH _HLTHPL1 EXERANY2 ADDEPEV3 CVDINFR4 MARITAL DIABETE4 EDUCA EMPLOY1 INCOME3 _BMI5CAT DIFFWALK DRNK3GE5 PREDIAB2 _IMPRACE _MENT14D _PHYS14D); RUN; /* only keeping these variables from the original dataset */ proc contents data=work.BRFSS23; run; proc print data= work.BRFSS23 (obs=20); run; /* all states asked our exposure and outcome questions */ proc means data= work.BRFSS23 nmiss; class _state; var SEXVAR GENHLTH _HLTHPL1 CVDINFR4 MARITAL DIABETE4 EDUCA EMPLOY1 INCOME3 _BMI5CAT DIFFWALK DRNK3GE5 PREDIAB2 _IMPRACE _MENT14D _PHYS14D; RUN; /* exclude the ACEDEPRS variable because most states did not ask this question */ proc freq data= work.BRFSS23; tables DIABETE4; run; /* diabetes "1" there */ proc contents data=work.BRFSS23; run; data work.BRFSS23; set work.BRFSS23; if nmiss(of ADDEPEV3 EXERANY2) = 0; run; /* no missing obs for exposure or outcome variable */ proc print data= work.BRFSS23 (obs=20); var addepev3 exerany2; run; proc contents data=work.BRFSS23; run; proc freq data= work.BRFSS23; tables DIABETE4*PREDIAB2; run; data work.BRFSS23; set work.BRFSS23; if DIABETE4 = 1 and missing(PREDIAB2) then PREDIAB2 = 4; run; /* those with diabetes (diabetes=1) were not asked the prediabetes question, so they show up as . (missing). I recoded missing PREDIAB1=4 (Not Applicable) */ data work.BRFSS23; set work.BRFSS23; if GENHLTH not in (7, 9, .) and _HLTHPL1 not in (7, 9, .) and EXERANY2 not in (7, 9, .) and ADDEPEV3 NOT IN (7, 9, .) AND CVDINFR4 NOT IN (7, 9, .) AND MARITAL NOT IN (9, .) and DIABETE4 NOT IN (7, 9, .) AND EDUCA NOT IN (9, .) AND EMPLOY1 not in (9, .) and INCOME3 not in (9, ., 77, 99) and _BMI5CAT not in (.) and DIFFWALK not in (7, 9, .) and DRNK3GE5 not in (77, 99, .) and _MENT14D not in (9) and (DIABETE4 = 1 or PREDIAB2 not in (., 7, 9)) and _PHYS14D not in (9); run; /* get rid of all missing / refused / no response */ proc freq data=work.BRFSS23; tables GENHLTH _HLTHPL1 EXERANY2 ADDEPEV3 CVDINFR4 MARITAL DIABETE4 EDUCA EMPLOY1 INCOME3 DIFFWALK DRNK3GE5 PREDIAB2 _MENT14D _PHYS14D _BMI5CAT / missing; run; /* checking to make sure no missing observations / wildcodes */ proc contents data= work.BRFSS23 varnum; run; proc format; value _STATEf 1= 'Alabama' 2= 'Alaska' 4= 'Arizona' 5= 'Arkansas' 6= 'California' 8= 'Colorado' 9= 'Connecticut' 10= 'Delaware' 11= 'District of Columbia' 12= 'Florida' 13= 'Georgia' 15= 'Hawaii' 16= 'Idaho' 17= 'Illinois' 18= 'Indiana' 19= 'Iowa' 20= 'Kansas' 21= 'Kentucky' 22= 'Louisiana' 23= 'Maine' 24= 'Maryland' 25= 'Massachusetts' 26= 'Michigan' 27= 'Minnesota' 28= 'Mississippi' 30= 'Montana' 31= 'Nebraska' 32= 'Nevada' 33= 'New Hampshire' 34= 'New Jersey' 35= 'New Mexico' 36= 'New York' 37= 'North Carolina' 38= 'North Dakota' 39= 'Ohio' 40= 'Oklahoma' 41= 'Oregon' 42= 'Pennsylvania' 44= 'Rhode Island' 45= 'South Carolina' 46= 'South Dakota' 47= 'Tennessee' 48= 'Texas' 49= 'Utah' 50= 'Vermont' 51= 'Virginia' 53= 'Washington' 54= 'West Virginia' 55= 'Wisconsin' 56= 'Wyoming' 66= 'Guam' 72= 'Puerto Rico'; value SEXVARf 1= 'Male' 2= 'Female'; value GENHLTHf 1= 'Excellent' 2='Very Good' 3='Good' 4='Fair' 5='Poor'; value _HLTHPL1f 1= 'Yes' 2= 'No'; value EXERANY2f 1= 'Yes' 2='No'; value ADDEPEV3f 1= 'Yes' 2='No'; value CVDINFR4f 1= 'Yes' 2='No'; value MARITALf 1= 'Married' 2='Divorced' 3='Widowed' 4='Separated' 5='Never Married' 6='A member of an unmarried couple'; value DIABETE4f 1= 'Yes' 2='Yes, but female told only during pregnancy' 3= 'No' 4='No, but prediabetes or borderline diabetes'; value EDUCAf 1='Never attended school or only kindergarden' 2= 'Elementary' 3='Some High School' 4='Some college or technical school' 5='College graduate' 6='A student' 7='Retired' 8='Unable to work'; value EMPLOY1f 1= 'Employed' 2= 'Self-employed' 3='Out of work for 1 year or more' 4='Out of work for less than 1 year' 5= 'A homemaker' 6='A student' 7='Retired' 8='Unable to work'; value INCOME3f 1= 'Less than $10,000' 2= '$10,000 to less than $15,000' 3= '$15,000 to less than $20,000' 4= '$20,000 to less than $25,000' 5= '$25,000 to less than $35,000' 6= '$35,000 to less than $50,000' 7= '$50,000 to less than $75,000' 8= '$75,000 to less than $100,000' 9= '$100,000 to less than $140,000' 10= '$150,000 to less than $200,000' 11= 'Greater than $200,000' ; value DIFFWALKf 1= 'Yes' 2='No'; value PREDIAB2f 1= 'Yes' 2= 'Yes, during pregnancy' 3= 'No' 4='Not Applicable'; value _IMPRACEf 1= 'White Non-Hispanic' 2= 'Black Non-Hispanic' 3= 'Asian Non-Hispanic' 4= 'American Indian/Alaskan Native Non-Hispanic' 5= 'Hispanic' 6= 'Other race Non-Hispanic'; value _MENT14Df 1= 'Zero days when mental health not good' 2= '1-13 days when mental health not good' 3= '14+ days when mental health not good'; value _PHYS14Df 1= 'Zero days when physical health not good' 2= '1-13 days when physical health not good' 3= '14+ days when physical health not good'; value _BMI5CATf 1= 'Underweight' 2='Normal Weight' 3='Overweight' 4='Obese'; run; /* creating formats */ data work.BRFSS23; set work.BRFSS23; format _STATE _STATEf. SEXVAR SEXVARf. GENHLTH GENHLTHf. _HLTHPL1 _HLTHPL1f. EXERANY2 EXERANY2f. ADDEPEV3 ADDEPEV3f. CVDINFR4 CVDINFR4f. MARITAL MARITALf. DIABETE4 DIABETE4f. EDUCA EDUCAf. EMPLOY1 EMPLOY1f. INCOME3 INCOME3f. DIFFWALK DIFFWALKf. PREDIAB2 PREDIAB2f. _IMPRACE _IMPRACEf. _MENT14D _MENT14Df. _PHYS14D _PHYS14Df. _BMI5CAT _BMI5CATf.; run; /* applying the formats to the dataset */ data work.BRFSS23; set work.BRFSS23; label _STATE = "State" SEXVAR = "Sex" GENHLTH = "General Health" _HLTHPL1 = "Have Any Health Coverage" EXERANY2 = "Exercise in Past 30 Days" ADDEPEV3 = "(Ever told) you had a depressive disorder" CVDINFR4 = "Ever Diagnosed with Heart Attack" MARITAL = "Marital Status" DIABETE4 = "(Ever told) you have Diabetes" EDUCA = "Education Level" EMPLOY1 = "Employment Status" INCOME3 = "Income Level" DIFFWALK = "Difficulty Walking or Climbing Stairs" PREDIAB2 = "Ever been told by a doctor or other health professional that you have pre-diabetes or borderline diabetes?" _IMPRACE = "Imputed Race/Ethnicity value" _MENT14D = "Computed Mental Health Status" _PHYS14D = "Computed Physical Health Status" _BMI5CAT = "Computed Body Mass Index Categories" Region_2 = "Region"; format _STATE _STATEf. SEXVAR SEXVARf. GENHLTH GENHLTHf. _HLTHPL1 _HLTHPL1f. EXERANY2 EXERANY2f. ADDEPEV3 ADDEPEV3f. CVDINFR4 CVDINFR4f. MARITAL MARITALf. DIABETE4 DIABETE4f. EDUCA EDUCAf. EMPLOY1 EMPLOY1f. INCOME3 INCOME3f. DIFFWALK DIFFWALKf. PREDIAB2 PREDIAB2f. _IMPRACE _IMPRACEf. _MENT14D _MENT14Df. _PHYS14D _PHYS14Df. _BMI5CAT _BMI5CATf. Region_2 Region_2f.; run; /* applying formats and labels to the dataset */ proc contents data= work.BRFSS23; run; proc means data=work.BRFSS23 n nmiss; run; /* check to make sure no missing (see missing for Region) */ proc freq data=work.BRFSS23; tables GENHLTH _HLTHPL1 EXERANY2 ADDEPEV3 CVDINFR4 MARITAL DIABETE4 EDUCA EMPLOY1 INCOME3 _BMI5CAT DIFFWALK PREDIAB2 _MENT14D _PHYS14D / missing; run; /* "Yes" row now showing up for DIABETE4*/ data work.BRFSS23; set work.BRFSS23; If _STATE in (9, 23, 25, 33, 44, 50, 34, 36, 42) then Region_2= 1; ELSE IF _STATE in (17, 18, 26, 39, 55, 19, 20, 27, 29, 31, 38, 46) then Region_2= 2; ELSE IF _STATE in (10, 11, 12, 13, 24, 37, 45, 51, 54, 1, 21, 28, 47, 5, 22, 40, 48) then Region_2= 3; ELSE IF _STATE in (4, 8, 16, 30, 32, 35, 49, 56, 2, 6, 15, 41, 53) then Region_2= 4; ELSE IF _STATE= 66 then Region_2= 5; ELSE IF _STATE= 72 then Region_2= 6; RUN; /* creating Region_2 variable as a numeric variable */ data work.BRFSS23; set work.BRFSS23; if MARITAL = 1 then MARITAL_new = 1; else if MARITAL in (2, 3, 4, 5, 6) then MARITAL_new = 2; if INCOME3 in (1,2,3) then INCOME3_new= 1; else if INCOME3 in (4, 5, 6,7) then INCOME3_new=2; else if INCOME3 in (8, 9, 10, 11) then INCOME3_new= 3; if _IMPRACE= 1 then _IMPRACE_new= 1; else _IMPRACE_new = 2; run; /* creating new variables for race, marital status, and income */ proc format; value MARITAL_newf 1= "Married" 2= "Not Married"; value INCOME3_newf 1= "Low" 2= "Middle" 3= "High"; value _IMPRACE_newf 1= "White Non-Hispanic" 2= "Other"; value Region_2f 1= "Northeast" 2= "Midwest" 3= "South" 4= "West" 5= "Guam" 6= "Puerto Rico"; run; /* creating formats for Region_2, _IMPRACE_new and MARITAL_new */ proc freq data=work.BRFSS23; tables GENHLTH _HLTHPL1 EXERANY2 ADDEPEV3 CVDINFR4 MARITAL_new DIABETE4 EDUCA EMPLOY1 INCOME3_new _BMI5CAT DIFFWALK DRNK3GE5 PREDIAB2 _MENT14D _PHYS14D _IMPRACE_new / missing; run; data work.BRFSS23; set work.BRFSS23; FORMAT MARITAL_new MARITAL_newf. DIABETE4 DIABETE4f. EDUCA EDUCAf. EMPLOY1 EMPLOY1f. INCOME3_new INCOME3_newf. DIFFWALK DIFFWALKf. PREDIAB2 PREDIAB2f. _IMPRACE_new _IMPRACE_newf. _MENT14D _MENT14Df. _PHYS14D _PHYS14Df. _BMI5CAT _BMI5CATf. Region_2 Region_2f.; run; /* applying formats */ proc format; value _AGEG5YRf 1 = '18-24' 2 = '25-29' 3 = '30-34' 4 = '35-39' 5 = '40-44' 6 = '45-49' 7 = '50-54' 8 = '55-59' 9 = '60-64' 10 = '65-69' 11 = '70-74' 12 = '75-79' 13 = '80+'; run; /* creating format for _AGE5YR */ data work.BRFSS23; set work.BRFSS23; format _AGEG5YR _ageg5yrf.; label _AGEG5YR = 'Age Group (5-Year Intervals)'; run; HERE is WHERE THE ERROR STARTED. /* setting _AGE5YR format */ libname dataset "/home/u61481478/my_shared_file_links/u59974645/PROJECT_DATA/"; run; DATA dataset.BRFSS23f; SET work.brfss23; RUN;
... View more