BookmarkSubscribeRSS Feed
cranberry
Calcite | Level 5

Hi, 

 

I have been working on the Essentials 1 Case Study looking at TSA Claims data. I have reviewed all the documents and am still having troubles with my code. 

 

For the total number of date issues, I got 3938 but the document says it should be 4241.

 

For the part with the macro for StateName, every time I run it there is no results/output and the log says:

 
NOTE: No observations were selected from data set TSA.CLAIMS_CLEANED.
NOTE: There were 0 observations read from the data set TSA.CLAIMS_CLEANED.
WHERE 0 /* an obviously FALSE WHERE clause */ ;
 
I even copied and pasted all of the code from the document and still receive the same number (3938) and same log message from my original code. When the macro is for "C" then there are results that show up, but none for the example with "California" (as per the document code). I noticed in the proc contents that the length of State and StateName was only 1, perhaps that's the problem? I tried changing the length of those in the data steps, but that didn't change anything. 
 
Can someone please help me make this work? Below is my code after I set up my library:
options validvarname=v7;
proc import datafile="&path/TSAClaims2002_2017.csv"
	dbms=csv out=ClaimsImport replace;
	guessingrows=max;
run;

/* EXPLORE DATA */

/* Look at first 20 observations */
proc print data=tsa.claimsimport (obs=20);
run;
/* Look at the descriptor portion of the data, sorted bycolumn order. */
proc contents data=tsa.claimsimport varnum;
run;

proc freq data=tsa.claimsimport;
	tables Claim_Site Disposition Claim_Type / nocum nopercent;
	tables Incident_Date Date_Received / nocum nopercent;
	format Incident_Date Date_Received year4.;
run;

/* PREPARE DATA */ 

/* Remove entirely duplicate rows and create new table */
proc sort data=tsa.claimsimport 
	out=tsa.Claims_NoDups 
		nodupkey;
	by _all_;
run;

proc sort data=tsa.claims_nodups;
	by Incident_Date;
run;


data tsa.Claims_Cleaned;
	set tsa.claims_nodups;
/* Clean Claim_Site by replacing missing and - values with Unknown */
	if Claim_Site in ('-',"") then Claim_Site = "Unknown"; 
 /*Clean Disposition */
	if Disposition in ("-", "") then 
		Disposition = 'Unknown';
	else if Disposition = 'Closed: Canceled' then 
		Disposition = 'Closed:Canceled';
	else if Disposition = 'losed: Contractor Claim' then 
		Disposition ='Closed:Contractor Claim';
/*Clean Claim_Type*/
	if Claim_Type in ("-","") then Claim_Type = "Unknown"; 
	else if Claim_Type = 'Passenger Property Loss/Injury' then Claim_Type = 'Passenger Property Loss';
	else if Claim_Type = 'Passenger Property Loss/Injur' then Claim_Type = 'Passenger Property Loss';
	else if Claim_Type = 'Property Damage/Personal Injury' then Claim_Type = 'Property Damage';
/* Convert State values to uppercase and all StateName Columns to proper case */
	State=upcase(state);
	StateName=propcase(StateName);
/* Create columns that identifies rows with issues	 */
	if (Incident_Date > Date_Received or
		Incident_Date = . or 
		Date_Received = . or
		year(Incident_Date) < 2002 or
		year(Incident_Date) > 2017 or
		year(Date_Received) < 2002 or
		year(Date_Received) > 2017)
		then Date_Issues = "Needs Review";
/* Create permanent labels for columns	 */
format Incident_Date Date_Received date9. Close_Amount dollar20.2;
label Airport_Code = "Airport Code"
	Airport_Name = "Airport Name"
	Claim_Number = "Claim Number"
	Claim_Site = "Claim Site"
	Claim_Type = "Claim Type"
	Close_Amount = "Close Amount"
	Date_Issues = "Date Issues"
	Date_Received = "Date Received"
	Incident_Date = "Incident Date"
	Item_Category = "Item Category";
/* Drop County and City Columns */
drop County City;
run;


/* ANALYZE */ 


/* How many date issues are in the overall data? 3938  */
title "Overall Date Issues in the Data";
proc freq data=tsa.Claims_Cleaned;
	table Date_Issues / nocum nopercent;
run;
title; 

/* How many claims per year of Incident_date are there in the overall data? 
	Values vary between years. There were 2122 in 2002 and 8340 in 2017 */
/* Include a plot */
ods graphics on; 
title "Overall Claims by Year";
proc freq data=tsa.Claims_Cleaned;
	table Incident_Date / nocum nopercent plots=freqplot;
	format Incident_Date year4.;
 	where Date_Issues is missing; /* filter where rows DO NOT have any date issues */
run; 

/* What are the frequency values for Claim_Type, Claim_Site, and Disposition for  */
/* 	a selected state? NOTE: User should be able to dynamically input a specific state */

%let StateName=California;

proc freq data=tsa.Claims_Cleaned order=freq;
	tables Claim_Type Claim_Site Disposition / nocum nopercent;
 	where StateName="&StateName" and Date_Issues is null; /* Filter for only rows where colunm has missing value */
run; 

proc means data=tsa.Claims_Cleaned mean min max sum maxdec=0;
	var Close_Amount;
	where StateName="&StateName" and Date_Issues is null;
run;

Ready to join fellow brilliant minds for the SAS Hackathon?

Build your skills. Make connections. Enjoy creative freedom. Maybe change the world. Registration is now open through August 30th. Visit the SAS Hackathon homepage.

Register today!
LIBNAME 101

Follow along as SAS technical trainer Dominique Weatherspoon expertly answers all your questions about SAS Libraries.

Find more tutorials on the SAS Users YouTube channel.

Discussion stats
  • 0 replies
  • 194 views
  • 0 likes
  • 1 in conversation