# Science of The Super Bowl

February 4, 2017
By

(This article was first published on R – Jesse Piburn, and kindly contributed to R-bloggers)

A couple days ago, I participated in a Science of the Super Bowl Panel discussion organized by Newswise. I was asked to give a 5 (which turned more into about 10) minute overview, so I focused on answering 3 questions.

• What is data science?
• How is data science used in the NFL?
• How might data science affect the outcome of the Super Bowl?

For my talk  (around 21:45 mark) I made some visuals, so I thought I would recreate one here and include the R code. Here is the finished product, below is the code to reproduce it. Here is a higher res copy, that is actually readable. ```######################################################
# RB direction charts
# 2/1/2017
#
#
######################################################

library(nflscrapR)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(gtable)
library(grid)
library(gridExtra)

# download all paly by play data for 2016 including playoffs -----
# this is going to take a while
reg_games <- extracting_gameids(2016, playoffs = FALSE)
post_games <- extracting_gameids(2016, playoffs = TRUE)
all_games <- c(reg_games, post_games)

game_list <- lapply(all_games, game_play_by_play)

s16 <- bind_rows(game_list)

# calculate run percentages -----
rundf <- s16 %>% filter(PlayType %in% c("Run") &
!is.na(down) &
posteam %in% c("NE", "ATL")) %>%
mutate(depth = ifelse(Yards.Gained <= 0, "Negative", "Short"),
depth = ifelse(Yards.Gained >= 5, "Middle", depth),
depth = ifelse(Yards.Gained >= 10, "Deep", depth)) %>%
group_by(posteam, Rusher) %>%
mutate(down_n = n()) %>%
group_by(posteam, Rusher, RunLocation, RunGap, depth) %>%
summarise(play_per = (n() / mean(down_n)) * 100,
loc_att = n(),
total_att = mean(down_n))

# just a little clean up -----
rundf[is.na(rundf\$RunLocation), "RunLocation"] <- "middle"
rundf[rundf\$RunLocation == "middle", "RunGap"] <- ""

rundf\$runplace <- stringr::str_trim(paste(rundf\$RunLocation, rundf\$RunGap))

rundf\$runplace <- factor(rundf\$runplace, c("left end", "left tackle",
"left guard", "middle",
"right guard", "right tackle",
"right end"))

rundf\$depth <- factor(rundf\$depth, c("Negative", "Short", "Middle", "Deep"))

# not all RBs will have runs to all locations, so make a full data frame -----
# and join to it before charting
fulldf <- expand.grid("runplace" =c("left end", "left tackle", "left guard",
"middle", "right guard", "right tackle",
"right end"),
"depth" = c("Negative", "Short", "Middle", "Deep"))

# Devonta Freeman -----
freemandf <- rundf %>% filter(Rusher == "D.Freeman") %>%
right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))

freeman_plot <- ggplot(freemandf, aes(x = runplace, y = depth)) +
geom_tile(aes(fill = play_per),colour = "white") +
scale_fill_gradient(high = "#ca0020", low = "#0571b0",
limits = c(0,15), guide_colourbar(title = "%")) +
theme_fivethirtyeight() +
labs(title = "Devonta Freeman")

# Dion Lewis -----
lewisdf <- rundf %>% filter(Rusher == "D.Lewis") %>%
right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))

lewis_plot <-  ggplot(lewisdf, aes(x = runplace, y = depth)) +
geom_tile(aes(fill = play_per),colour = "white") +
scale_fill_gradient(high = "#ca0020", low = "#0571b0",
limits = c(0,30), guide_colourbar(title = "%")) +
theme_fivethirtyeight() +
labs(title = "Dion Lewis")

# blount -----
blountdf <- rundf %>% filter(Rusher == "L.Blount") %>%
right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))

blount_plot<-  ggplot(blountdf, aes(x = runplace, y = depth)) +
geom_tile(aes(fill = play_per),colour = "white") +
scale_fill_gradient(high = "#ca0020", low = "#0571b0",
limits = c(0,15), guide_colourbar(title = "%")) +
theme_fivethirtyeight() +
labs(title = "LeGarrette Blount")

# colemen -----
colemandf <- rundf %>% filter(Rusher == "T.Coleman") %>%
right_join(fulldf) %>% mutate(play_per = ifelse(is.na(play_per), 0, play_per))

coleman_plot <- ggplot(colemandf, aes(x = runplace, y = depth)) +
geom_tile(aes(fill = play_per),colour = "white") +
scale_fill_gradient(high = "#ca0020", low = "#0571b0",
limits = c(0,15), guide_colourbar(title = "%")) +
theme_fivethirtyeight() +
labs(title = "Tevin Coleman")

# combine plots and add text -----

p1 <- arrangeGrob(freeman_plot, coleman_plot,
blount_plot, lewis_plot)

# sub title ----
titleback <- rectGrob(gp=gpar(fill = "#F0F0F0", col = NA))
titlesub <- textGrob("2016 Run Tendency for Super Bowl Running Backs",
gp = gpar(fontsize = 12, fontface = "bold",
fontfamily = "sans", col = "#3C3C3C"),
just = "left", x = unit(0.01, "npc"))

p1 <- gtable_add_rows(p1, heights = grobHeight(titlesub) + padding, pos = 0)
p1 <- gtable_add_grob(p1, grobTree(titleback, titlesub), 1, 1, 1, ncol(p1))

# title ----
title <- textGrob("Which Way Did he Go?",
gp = gpar(fontsize = 18, fontface = "bold",
fontfamily = "sans", col = "#3C3C3C"),
just = "left", x = unit(0.01, "npc"))

p1 <- gtable_add_rows(p1, heights = grobHeight(title) + padding, pos = 0)
p1 <- gtable_add_grob(p1, grobTree(titleback, title), 1, 1, 1, ncol(p1))

# bottom ----
subtext <- textGrob("Source: @JesseOPiburn",
gp = gpar(fontsize = 10, fontface = "bold.italic",
fontfamily = "sans", col = "#3C3C3C"),
just = "left", x = unit(0.01, "npc"))

p1 <- gtable_add_rows(p1, heights = grobHeight(subtext) + padding, pos = -1)
p1 <- gtable_add_grob(p1, grobTree(titleback,subtext), t = nrow(p1), l = 1,
b = nrow(p1), r = 2)
grid.draw(p1)

ggsave(plot = p1, filename = "plots/rb direction.png",
width = 18, height = 10, units = "in", dpi = 600)```

To leave a comment for the author, please follow the link and comment on their blog: R – Jesse Piburn.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...

If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...