/*Step 1: Clean the data*/ data earthquakes_clean; set quakes.earthquakes_dirty; ID=scan(ID_REGIONCODE, 1); REGION_CODE=scan(ID_REGIONCODE,2); FLAG_TSUNAMI=upcase(FLAG_TSUNAMI); date_time=dhms(mdy(month,day,year),hour, minute, seconds); EQ_PRIMARY= coalesce(EQ_MAG_MW, EQ_MAG_MS, EQ_MAG_MB, EQ_MAG_ML, EQ_MAG_MFA, EQ_MAG_UNK) ; format EQ_PRIMARY 3.1; format date_time datetime.; keep ID Region_Code Flag_Tsunami Date_Time EQ_Primary Focal_Depth Country Location_Name; run; /*Step 2: Create a valid and invalid data set*/ proc sort data=earthquakes_clean; by id; run; data earthquakes_valid(drop=invalid_description) invalid; set earthquakes_clean; length invalid_description $ 100; invalid_description=""; by id; if (first.id ne last.id and first.id ne 1) then invalid_description="id"; if region_code not in ("10", "15", "20", "30", "40", "50", "60", "70", "80", "90","100", "110", "120", "130", "140", "150", "160", "170") then invalid_description=catx(" ",invalid_description, "region_code"); if flag_tsunami not in ("", "TSU") then invalid_description=catx(" ", invalid_description, "flag_tsunami"); if date_time=. then invalid_description=catx(" ", invalid_description, "date_time"); if eq_primary < 0.0 or eq_primary > 9.9 then invalid_description=catx(" ", invalid_description, "eq_primary"); if focal_depth < 0 or focal_depth > 700 then invalid_description=catx(" ", invalid_description, "focal_depth"); if invalid_description ne "" then output invalid; else output earthquakes_valid; run;
... View more