BookmarkSubscribeRSS Feed
tc
Lapis Lazuli | Level 10 tc
Lapis Lazuli | Level 10

HR.png

 

Drawing inspiration from Curtis Harris' fine The History of the Single Season Home Run Record, here's code for a quick box plot take on the same subject.

 

UPDATE: WaPo's Baseball just saw its biggest home run surge since the steroids era. Here’s why. has a nice explanation of some of the reasons behind the rise and fall of the longball.

 

* Box plot distribution of top 50 home run leaders of 1910-2015
  Data sourced from Lahman's Baseball Database (seanlahman.com/baseball-archive/statistics);
  
data homers(keep=playerID yearID teamID HR);                   * Grab batting stats;
infile '/folders/myfolders/BaseBall/Batting.csv' dlm=',' dsd truncover firstobs=2;
input playerID : $10. yearID stint teamID : $3. lgID : $2. G AB R H D T HR;

data players(keep=playerID namefirst namelast compress=char);  * Grab player info;
infile '/folders/myfolders/BaseBall/Master.csv' dlm=',' dsd truncover firstobs=2;
input playerID : $10. birthYear birthMonth birthDay birthCountry : $10. birthState : $10. birthCity: $30. 
      deathYear deathMonth	deathDay deathCountry : $10. deathState : $10. deathCity: $30. 
      nameFirst: $30. nameLast : $30.;

proc rank data=homers out=HRrank ties=low descending;          * Rank players by HR for each season;
by yearId; var HR; ranks HRrank;

proc sql;                                                      * Merge data, tag batters w/60+ HR, keep top 50;
create table HRleaders as
select yearid, hr,
       case when hr>=60 then '   '||trim(namelast)||" ("||put(hr,z2.)||','||put(yearid,z4.)||")" end as HR60
from HRrank, players where HRrank.playerid=players.playerid and HRrank between 1 and 50;

ods listing image_dpi=300 gpath='/folders/myfolders';          * One boxplot/season for top 50 HR hitters;
ods graphics on / reset antialias width=14in height=11in imagename="HR" antialiasmax=6000;
proc sgplot data=hrleaders(where=(yearid>=1910)) noautolegend;
Title "Top 50 Home Run Leaders 1910-2015";
vbox hr / category=yearid boxwidth=.35;
text x=yearid y=HR text=HR60 / position=right;
xaxis display=(nolabel) valueattrs=(size=7pt) type=linear values=(1910 to 2015 by 5) offsetmax=.005;
yaxis display=(nolabel) valueattrs=(size=7pt) grid;
footnote height=7pt "Note: League followed 154 game schedule until 1961, and 162 games since. Number of games varied widely from 1901-1919, and strikes shortened '72, '81, '94, '95 seasons.";
run;

 

 

1 REPLY 1
ballardw
Super User

For added fun: Normalize this to the number of games played.

sas-innovate-2024.png

Join us for SAS Innovate April 16-19 at the Aria in Las Vegas. Bring the team and save big with our group pricing for a limited time only.

Pre-conference courses and tutorials are filling up fast and are always a sellout. Register today to reserve your seat.

 

Register now!

How to Concatenate Values

Learn how use the CAT functions in SAS to join values from multiple variables into a single value.

Find more tutorials on the SAS Users YouTube channel.

Click image to register for webinarClick image to register for webinar

Classroom Training Available!

Select SAS Training centers are offering in-person courses. View upcoming courses for:

View all other training opportunities.

Discussion stats
  • 1 reply
  • 1019 views
  • 1 like
  • 2 in conversation