State Space AFL

[This article was first published on Analysis of AFL, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Ever read a post and went damn! I really wonder how that would work for AFL .

Well that was me a couple of weeks ago reading this fantastic post and as most people know a good post is a fantastic post when it is reproducible.

library(tidyverse)

## -- Attaching packages --------------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.4
## v tidyr   0.8.0     v stringr 1.3.0
## v readr   1.1.1     v forcats 0.3.0

## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(rjags)

## Loading required package: coda

## Linked to JAGS 4.3.0

## Loaded modules: basemod,bugs

library(gsheet)
library(stringr)
library(knitr)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

logit <- function(p) { 
  out <- log(p/(1 - p))
  return(out)
}

url<-"https://docs.google.com/spreadsheets/d/1U95IzGYJGOzQgLZVmUk6PPsGPLJoyJ2-Ry2dEY3sU3A/edit?usp=sharing"
afl_bookies<-read.csv(text=gsheet2text(url, format='csv'), stringsAsFactors=FALSE)

## No encoding supplied: defaulting to UTF-8.

names(afl_bookies)

##  [1] "date"                    "Kick.Off..local."       
##  [3] "Home.Team"               "Away.Team"              
##  [5] "Venue"                   "Home.Score"             
##  [7] "Away.Score"              "Play.Off.Game."         
##  [9] "Home.Goals"              "Home.Behinds"           
## [11] "Away.Goals"              "Away.Behinds"           
## [13] "Home.Odds"               "Away.Odds"              
## [15] "Bookmakers.Surveyed"     "Home.Odds.Open"         
## [17] "Home.Odds.Min"           "Home.Odds.Max"          
## [19] "Home.Odds.Close"         "Away.Odds.Open"         
## [21] "Away.Odds.Min"           "Away.Odds.Max"          
## [23] "Away.Odds.Close"         "Home.Line.Open"         
## [25] "Home.Line.Min"           "Home.Line.Max"          
## [27] "Home.Line.Close"         "Away.Line.Open"         
## [29] "Away.Line.Min"           "Away.Line.Max"          
## [31] "Away.Line.Close"         "Home.Line.Odds.Open"    
## [33] "Home.Line.Odds.Min"      "Home.Line.Odds.Max"     
## [35] "Home.Line.Odds.Close"    "Away.Line.Odds.Open"    
## [37] "Away.Line.Odds.Min"      "Away.Line.Odds.Max"     
## [39] "Away.Line.Odds.Close"    "Total.Score.Open"       
## [41] "Total.Score.Min"         "Total.Score.Max"        
## [43] "Total.Score.Close"       "Total.Score.Over.Open"  
## [45] "Total.Score.Over.Min"    "Total.Score.Over.Max"   
## [47] "Total.Score.Over.Close"  "Total.Score.Under.Open" 
## [49] "Total.Score.Under.Min"   "Total.Score.Under.Max"  
## [51] "Total.Score.Under.Close"

colnames(afl_bookies)[1] <- "date"
afl_bookies$date<-dmy(afl_bookies$date)

afl_bookies$overround<-(1/afl_bookies$Home.Odds.Close) + (1/afl_bookies$Away.Odds.Close)
summary(afl_bookies$overround)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.012   1.024   1.026   1.027   1.028   1.063     681

plot(afl_bookies$overround)

plot(afl_bookies$overround-1)

qplot(afl_bookies$overround,geom="histogram", bins=100)

## Warning: Removed 681 rows containing non-finite values (stat_bin).

afl_bookies$overround<-afl_bookies$overround-1


afl_bookies$true.home.prob<-1/((afl_bookies$Home.Odds.Close*afl_bookies$overround)+afl_bookies$Home.Odds.Close)
afl_bookies$true.away.prob<-1/((afl_bookies$Away.Odds.Close*afl_bookies$overround)+afl_bookies$Away.Odds.Close)

qplot((afl_bookies$true.home.prob+afl_bookies$true.away.prob),geom="histogram")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 681 rows containing non-finite values (stat_bin).

min.day <- min(afl_bookies$date)
afl_bookies <- afl_bookies %>%
  mutate(day = date - min.day, week = as.numeric(floor(day/7) + 1))

tab.out <- head(afl_bookies, 4) %>% select(date, Home.Team, Away.Team, true.home.prob)
kable(tab.out)

date Home.Team Away.Team true.home.prob
2018-04-22 Brisbane Gold Coast 0.5454545
2018-04-22 North Melbourne Hawthorn 0.3811881
2018-04-21 Fremantle Western Bulldogs 0.6030151
2018-04-21 Port Adelaide Geelong 0.6474820

y <- logit(afl_bookies$true.home.prob)
w <- afl_bookies$week
w

##    [1] 462 462 462 462 462 462 462 461 461 461 461 461 461 461 461 461 460
##   [18] 460 460 460 460 460 460 460 460 459 459 459 459 459 459 459 459 458
##   [35] 458 458 458 458 458 458 458 458 457 433 432 432 431 431 430 430 430
##   [52] 429 428 428 428 428 428 428 428 428 428 427 427 427 427 427 427 427
##   [69] 427 427 426 426 426 426 426 426 426 426 426 425 425 425 425 425 425
##   [86] 425 425 425 424 424 424 424 424 424 424 424 424 423 423 423 423 423
##  [103] 423 423 423 423 422 422 422 422 422 422 422 422 422 421 421 421 421
##  [120] 421 421 421 421 421 420 420 420 420 420 420 420 420 420 419 419 419
##  [137] 419 419 419 419 419 418 418 418 418 418 418 417 417 417 417 417 417
##  [154] 417 416 416 416 416 416 416 415 415 415 415 415 415 415 415 415 414
##  [171] 414 414 414 414 414 414 414 414 413 413 413 413 413 413 413 413 413
##  [188] 412 412 412 412 412 412 412 412 412 411 411 411 411 411 411 411 411
##  [205] 411 410 410 410 410 410 410 410 410 410 409 409 409 409 409 409 409
##  [222] 409 408 408 408 408 408 408 408 408 408 408 407 407 407 407 407 407
##  [239] 407 407 406 406 406 406 406 406 406 406 406 405 381 380 380 379 379
##  [256] 378 378 378 377 376 376 376 376 376 376 376 376 376 375 375 375 375
##  [273] 375 375 375 375 375 374 374 374 374 374 374 374 374 374 373 373 373
##  [290] 373 373 373 373 373 373 372 372 372 372 372 372 372 372 372 371 371
##  [307] 371 371 371 371 371 371 371 370 370 370 370 370 370 370 370 369 369
##  [324] 369 369 369 369 369 369 369 368 368 368 368 368 368 367 367 367 367
##  [341] 367 367 366 366 366 366 366 366 366 365 365 365 365 365 365 365 365
##  [358] 365 364 364 364 364 364 364 364 364 364 363 363 363 363 363 363 363
##  [375] 363 363 362 362 362 362 362 362 362 362 362 361 361 361 361 361 361
##  [392] 361 361 361 360 360 360 360 360 360 360 360 360 359 359 359 359 359
##  [409] 359 359 359 359 358 358 358 358 358 358 358 358 358 357 357 357 357
##  [426] 357 357 357 357 357 356 356 356 356 356 356 356 356 356 355 355 355
##  [443] 355 355 355 355 355 355 354 354 354 354 354 354 354 354 353 329 328
##  [460] 328 327 327 326 326 326 326 325 325 325 325 325 325 325 325 325 324
##  [477] 324 324 324 324 324 324 324 324 323 323 323 323 323 323 323 323 323
##  [494] 322 322 322 322 322 322 322 322 322 321 321 321 321 321 321 321 321
##  [511] 321 320 320 320 320 320 320 320 320 320 319 319 319 319 319 319 319
##  [528] 319 319 318 318 318 318 318 318 318 318 318 317 317 317 317 317 317
##  [545] 317 317 316 316 316 316 316 316 316 316 315 315 315 315 315 315 314
##  [562] 314 314 314 314 314 313 313 313 313 313 313 313 312 312 312 312 312
##  [579] 312 312 312 312 311 311 311 311 311 311 311 311 311 310 310 310 310
##  [596] 310 310 310 310 310 309 309 309 309 309 309 309 309 309 308 308 308
##  [613] 308 308 308 308 308 308 307 307 307 307 307 307 307 307 307 306 306
##  [630] 306 306 306 306 306 306 306 305 305 305 305 305 305 305 305 305 304
##  [647] 304 304 304 304 304 304 304 304 303 303 303 303 303 303 303 303 302
##  [664] 276 275 275 274 274 273 273 273 273 272 272 272 272 272 272 272 272
##  [681] 272 271 271 271 271 271 271 271 271 271 270 270 270 270 270 270 270
##  [698] 270 270 269 269 269 269 269 269 269 269 269 268 268 268 268 268 268
##  [715] 268 268 267 267 267 267 267 266 266 266 266 266 265 265 265 265 265
##  [732] 265 265 265 265 264 264 264 264 264 264 264 264 264 263 263 263 263
##  [749] 263 263 263 263 263 262 262 262 262 262 262 262 262 262 261 261 261
##  [766] 261 261 261 261 261 261 260 260 260 260 260 260 260 260 260 259 259
##  [783] 259 259 259 259 259 259 258 258 258 258 258 258 258 257 257 257 257
##  [800] 257 256 256 256 256 256 256 256 255 255 255 255 255 255 255 255 255
##  [817] 254 254 254 254 254 254 254 254 254 253 253 253 253 253 253 253 253
##  [834] 252 252 252 252 252 252 252 252 252 252 251 251 251 251 251 251 251
##  [851] 251 251 250 250 250 250 250 250 250 250 249 249 249 249 249 248 248
##  [868] 248 248 248 224 223 223 222 222 221 221 221 221 220 220 220 220 220
##  [885] 220 220 220 220 219 219 219 219 219 219 219 219 219 218 218 218 218
##  [902] 218 218 218 218 218 217 217 217 217 217 217 217 217 217 216 216 216
##  [919] 216 216 216 216 216 216 215 215 215 215 215 215 215 215 215 214 214
##  [936] 214 214 214 214 214 214 214 213 213 213 213 213 213 213 213 213 212
##  [953] 212 212 212 212 212 212 212 212 211 211 211 211 211 211 211 211 210
##  [970] 210 210 210 210 210 210 209 209 209 209 209 209 208 208 208 208 208
##  [987] 208 207 207 207 207 207 207 207 207 207 206 206 206 206 206 206 206
## [1004] 206 206 205 205 205 205 205 205 205 205 205 204 204 204 204 204 204
## [1021] 204 204 204 203 203 203 203 203 203 203 203 203 202 202 202 202 202
## [1038] 202 202 201 201 201 201 201 201 201 201 201 201 201 200 200 200 200
## [1055] 200 200 200 200 200 199 199 199 199 199 199 199 199 199 198 198 198
## [1072] 198 198 198 197 197 197 172 171 171 170 170 169 169 169 169 168 168
## [1089] 168 168 168 168 168 168 168 167 167 167 167 167 167 167 167 167 166
## [1106] 166 166 166 166 166 166 166 166 165 165 165 165 165 165 165 165 165
## [1123] 164 164 164 164 164 164 164 164 164 163 163 163 163 163 163 163 163
## [1140] 163 162 162 162 162 162 162 162 162 162 161 161 161 161 161 161 161
## [1157] 161 161 160 160 160 160 160 160 160 160 160 159 159 159 159 159 159
## [1174] 159 159 159 158 158 158 158 158 158 157 157 157 157 157 156 156 156
## [1191] 156 156 156 156 155 155 155 155 155 155 155 155 155 154 154 154 154
## [1208] 154 154 154 154 154 153 153 153 153 153 153 153 153 153 152 152 152
## [1225] 152 152 152 152 152 152 151 151 151 151 151 151 151 151 151 150 150
## [1242] 150 150 150 150 150 150 149 149 149 149 149 149 149 149 149 149 148
## [1259] 148 148 148 148 148 148 148 148 147 147 147 147 147 147 147 147 146
## [1276] 146 146 146 146 146 146 146 145 145 120 119 119 118 118 117 117 117
## [1293] 117 116 116 116 116 116 116 116 116 115 115 115 115 115 115 115 115
## [1310] 114 114 114 114 114 114 114 114 113 113 113 113 113 113 113 113 112
## [1327] 112 112 112 112 112 112 112 111 111 111 111 111 111 111 110 110 110
## [1344] 110 110 110 110 110 109 109 109 109 109 109 109 109 108 108 108 108
## [1361] 108 108 108 107 107 107 107 107 107 107 107 106 106 106 106 106 106
## [1378] 106 106 105 105 105 105 105 105 105 105 104 104 104 104 104 104 104
## [1395] 104 103 103 103 103 103 103 103 103 102 102 102 102 102 102 102 102
## [1412] 101 101 101 101 101 101 101 101 100 100 100 100 100 100 100 100  99
## [1429]  99  99  99  99  99  99  99  98  98  98  98  98  98  97  97  97  97
## [1446]  97  97  97  96  96  96  96  96  96  96  96  95  95  95  95  95  95
## [1463]  95  95  94  94  94  94  94  94  94  94  93  93  93  93  93  93  93
## [1480]  92  68  67  66  66  65  65  64  64  64  64  63  63  63  63  63  63
## [1497]  63  63  62  62  62  62  62  62  62  62  61  61  61  61  61  61  61
## [1514]  61  60  60  60  60  60  60  60  60  59  59  59  59  59  59  59  59
## [1531]  58  58  58  58  58  58  58  58  57  57  57  57  57  57  57  57  56
## [1548]  56  56  56  56  56  56  56  55  55  55  55  55  55  55  54  54  54
## [1565]  54  53  53  53  53  53  52  52  52  52  52  52  52  52  51  51  51
## [1582]  51  51  51  51  51  50  50  50  50  50  50  50  50  49  49  49  49
## [1599]  49  49  49  49  48  48  48  48  48  48  48  48  47  47  47  47  47
## [1616]  47  47  47  46  46  46  46  46  46  46  46  45  45  45  45  45  45
## [1633]  45  45  44  44  44  44  44  44  44  44  43  43  43  43  43  43  43
## [1650]  43  42  42  42  42  42  42  42  41  41  41  41  41  41  41  41  40
## [1667]  15  14  14  13  13  12  12  12  12  11  11  11  11  11  11  11  11
## [1684]  10  10  10  10  10  10  10  10   9   9   9   9   9   9   9   9   8
## [1701]   8   8   8   8   8   8   8   7   7   7   7   7   7   7   7   6   6
## [1718]   6   6   6   6   6   6   5   5   5   5   5   5   5   5   4   4   4
## [1735]   4   4   4   4   4   3   3   3   3   3   3   3   3   2   2   2   2
## [1752]   2   2   2   2   1   1   1

#create a design matrix 
Teams <- sort(as.character(unique(c(as.character(afl_bookies$Home.Team)))))

#Defining the number of things
nTeams <- length(Teams)
nWeeks <- max(afl_bookies$week)
n <- nrow(afl_bookies)

#Defining the design matrix
x <- matrix(0, nrow = dim(afl_bookies)[1], ncol = length(Teams))
for (i in 1:dim(afl_bookies)[1]) {
  x[i, which(as.character(afl_bookies[i,"home"]) == Teams)] <- (1)
  x[i, which(as.character(afl_bookies[i,"away"]) == Teams)] <- (-1)
} 


model.string <-"
model { 
for (i in 1:n) {
y[i] ~ dnorm(mu[i], tauGame)
mu[i] <- alpha + inprod(theta[w[i],],x[i,])
}
for (j in 1:nTeams){
theta[1,j] ~ dnorm(0, tauSeason)
}
for (www in 2:nWeeks) {  
for (j in 1:nTeams) {
theta[www,j] ~ dnorm(gammaWeek*theta[www-1,j], tauWeek)
}
}
alpha ~ dnorm(0,0.0001)
tauGame ~ dunif(0,1000) #uncertainty in outcome for each game
tauWeek ~ dunif(0,1000) 
tauSeason ~ dunif(0,1000) #variance parameter for the first week of the season
gammaWeek ~ dunif(0,1.5)
}
"
model.spec<-textConnection(model.string)

library(rjags)
n.chains <- 3 
n.adapt <- n.update <- n.draws <- 1000

posteriorDraws = c('alpha','theta')
thin <- 5
jags <- jags.model(model.spec,
                   data = list('y' = y,'x' = x, 'w' = w, 'n' = n,'nTeams' = nTeams,'nWeeks' = nWeeks), 
                   n.chains = n.chains, n.adapt = n.adapt)

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1077
##    Unobserved stochastic nodes: 9002
##    Total graph size: 54294
## 
## Initializing model

update(jags, n.update)
z <- jags.samples(jags, posteriorDraws, n.draws, thin = thin)

colours <- c("#7fc97f", "#beaed4", "#fdc086")
hfas <- data.frame(round(z$alpha[,,], 3))  %>% mutate(draw = 1:n())
hfas %>% ggplot(aes(draw, X1)) +
  geom_line(colour = colours[1]) + 
  geom_line(data = hfas, aes(draw, X2), colour = colours[2]) + 
  geom_line(data = hfas, aes(draw, X3), colour = colours[3]) + 
  xlab("Draw") + 
  ggtitle("Home advantage (logit scale)") + 
  ylab("") + 
  theme_bw()

There you go, pretty cool!

To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)