State Space AFL

April 30, 2018
By

(This article was first published on Analysis of AFL, and kindly contributed to R-bloggers)

Ever read a post and went damn! I really wonder how that would work for AFL .

Well that was me a couple of weeks ago reading this fantastic post and as most people know a good post is a fantastic post when it is reproducible.

library(tidyverse) 
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.4
## v tidyr   0.8.0     v stringr 1.3.0
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
library(gsheet)
library(stringr)
library(knitr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
logit <- function(p) { 
  out <- log(p/(1 - p))
  return(out)
}

url<-"https://docs.google.com/spreadsheets/d/1U95IzGYJGOzQgLZVmUk6PPsGPLJoyJ2-Ry2dEY3sU3A/edit?usp=sharing"
afl_bookies<-read.csv(text=gsheet2text(url, format='csv'), stringsAsFactors=FALSE)
## No encoding supplied: defaulting to UTF-8.
names(afl_bookies)
##  [1] "date"                    "Kick.Off..local."       
##  [3] "Home.Team"               "Away.Team"              
##  [5] "Venue"                   "Home.Score"             
##  [7] "Away.Score"              "Play.Off.Game."         
##  [9] "Home.Goals"              "Home.Behinds"           
## [11] "Away.Goals"              "Away.Behinds"           
## [13] "Home.Odds"               "Away.Odds"              
## [15] "Bookmakers.Surveyed"     "Home.Odds.Open"         
## [17] "Home.Odds.Min"           "Home.Odds.Max"          
## [19] "Home.Odds.Close"         "Away.Odds.Open"         
## [21] "Away.Odds.Min"           "Away.Odds.Max"          
## [23] "Away.Odds.Close"         "Home.Line.Open"         
## [25] "Home.Line.Min"           "Home.Line.Max"          
## [27] "Home.Line.Close"         "Away.Line.Open"         
## [29] "Away.Line.Min"           "Away.Line.Max"          
## [31] "Away.Line.Close"         "Home.Line.Odds.Open"    
## [33] "Home.Line.Odds.Min"      "Home.Line.Odds.Max"     
## [35] "Home.Line.Odds.Close"    "Away.Line.Odds.Open"    
## [37] "Away.Line.Odds.Min"      "Away.Line.Odds.Max"     
## [39] "Away.Line.Odds.Close"    "Total.Score.Open"       
## [41] "Total.Score.Min"         "Total.Score.Max"        
## [43] "Total.Score.Close"       "Total.Score.Over.Open"  
## [45] "Total.Score.Over.Min"    "Total.Score.Over.Max"   
## [47] "Total.Score.Over.Close"  "Total.Score.Under.Open" 
## [49] "Total.Score.Under.Min"   "Total.Score.Under.Max"  
## [51] "Total.Score.Under.Close"
colnames(afl_bookies)[1] <- "date"
afl_bookies$date<-dmy(afl_bookies$date)

afl_bookies$overround<-(1/afl_bookies$Home.Odds.Close) + (1/afl_bookies$Away.Odds.Close)
summary(afl_bookies$overround)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.012   1.024   1.026   1.027   1.028   1.063     681
plot(afl_bookies$overround)

plot(afl_bookies$overround-1)

qplot(afl_bookies$overround,geom="histogram", bins=100)
## Warning: Removed 681 rows containing non-finite values (stat_bin).

afl_bookies$overround<-afl_bookies$overround-1


afl_bookies$true.home.prob<-1/((afl_bookies$Home.Odds.Close*afl_bookies$overround)+afl_bookies$Home.Odds.Close)
afl_bookies$true.away.prob<-1/((afl_bookies$Away.Odds.Close*afl_bookies$overround)+afl_bookies$Away.Odds.Close)

qplot((afl_bookies$true.home.prob+afl_bookies$true.away.prob),geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 681 rows containing non-finite values (stat_bin).

min.day <- min(afl_bookies$date)
afl_bookies <- afl_bookies %>%
  mutate(day = date - min.day, week = as.numeric(floor(day/7) + 1))

tab.out <- head(afl_bookies, 4) %>% select(date, Home.Team, Away.Team, true.home.prob)
kable(tab.out)
date Home.Team Away.Team true.home.prob
2018-04-22 Brisbane Gold Coast 0.5454545
2018-04-22 North Melbourne Hawthorn 0.3811881
2018-04-21 Fremantle Western Bulldogs 0.6030151
2018-04-21 Port Adelaide Geelong 0.6474820
y <- logit(afl_bookies$true.home.prob)
w <- afl_bookies$week
w
##    [1] 462 462 462 462 462 462 462 461 461 461 461 461 461 461 461 461 460
##   [18] 460 460 460 460 460 460 460 460 459 459 459 459 459 459 459 459 458
##   [35] 458 458 458 458 458 458 458 458 457 433 432 432 431 431 430 430 430
##   [52] 429 428 428 428 428 428 428 428 428 428 427 427 427 427 427 427 427
##   [69] 427 427 426 426 426 426 426 426 426 426 426 425 425 425 425 425 425
##   [86] 425 425 425 424 424 424 424 424 424 424 424 424 423 423 423 423 423
##  [103] 423 423 423 423 422 422 422 422 422 422 422 422 422 421 421 421 421
##  [120] 421 421 421 421 421 420 420 420 420 420 420 420 420 420 419 419 419
##  [137] 419 419 419 419 419 418 418 418 418 418 418 417 417 417 417 417 417
##  [154] 417 416 416 416 416 416 416 415 415 415 415 415 415 415 415 415 414
##  [171] 414 414 414 414 414 414 414 414 413 413 413 413 413 413 413 413 413
##  [188] 412 412 412 412 412 412 412 412 412 411 411 411 411 411 411 411 411
##  [205] 411 410 410 410 410 410 410 410 410 410 409 409 409 409 409 409 409
##  [222] 409 408 408 408 408 408 408 408 408 408 408 407 407 407 407 407 407
##  [239] 407 407 406 406 406 406 406 406 406 406 406 405 381 380 380 379 379
##  [256] 378 378 378 377 376 376 376 376 376 376 376 376 376 375 375 375 375
##  [273] 375 375 375 375 375 374 374 374 374 374 374 374 374 374 373 373 373
##  [290] 373 373 373 373 373 373 372 372 372 372 372 372 372 372 372 371 371
##  [307] 371 371 371 371 371 371 371 370 370 370 370 370 370 370 370 369 369
##  [324] 369 369 369 369 369 369 369 368 368 368 368 368 368 367 367 367 367
##  [341] 367 367 366 366 366 366 366 366 366 365 365 365 365 365 365 365 365
##  [358] 365 364 364 364 364 364 364 364 364 364 363 363 363 363 363 363 363
##  [375] 363 363 362 362 362 362 362 362 362 362 362 361 361 361 361 361 361
##  [392] 361 361 361 360 360 360 360 360 360 360 360 360 359 359 359 359 359
##  [409] 359 359 359 359 358 358 358 358 358 358 358 358 358 357 357 357 357
##  [426] 357 357 357 357 357 356 356 356 356 356 356 356 356 356 355 355 355
##  [443] 355 355 355 355 355 355 354 354 354 354 354 354 354 354 353 329 328
##  [460] 328 327 327 326 326 326 326 325 325 325 325 325 325 325 325 325 324
##  [477] 324 324 324 324 324 324 324 324 323 323 323 323 323 323 323 323 323
##  [494] 322 322 322 322 322 322 322 322 322 321 321 321 321 321 321 321 321
##  [511] 321 320 320 320 320 320 320 320 320 320 319 319 319 319 319 319 319
##  [528] 319 319 318 318 318 318 318 318 318 318 318 317 317 317 317 317 317
##  [545] 317 317 316 316 316 316 316 316 316 316 315 315 315 315 315 315 314
##  [562] 314 314 314 314 314 313 313 313 313 313 313 313 312 312 312 312 312
##  [579] 312 312 312 312 311 311 311 311 311 311 311 311 311 310 310 310 310
##  [596] 310 310 310 310 310 309 309 309 309 309 309 309 309 309 308 308 308
##  [613] 308 308 308 308 308 308 307 307 307 307 307 307 307 307 307 306 306
##  [630] 306 306 306 306 306 306 306 305 305 305 305 305 305 305 305 305 304
##  [647] 304 304 304 304 304 304 304 304 303 303 303 303 303 303 303 303 302
##  [664] 276 275 275 274 274 273 273 273 273 272 272 272 272 272 272 272 272
##  [681] 272 271 271 271 271 271 271 271 271 271 270 270 270 270 270 270 270
##  [698] 270 270 269 269 269 269 269 269 269 269 269 268 268 268 268 268 268
##  [715] 268 268 267 267 267 267 267 266 266 266 266 266 265 265 265 265 265
##  [732] 265 265 265 265 264 264 264 264 264 264 264 264 264 263 263 263 263
##  [749] 263 263 263 263 263 262 262 262 262 262 262 262 262 262 261 261 261
##  [766] 261 261 261 261 261 261 260 260 260 260 260 260 260 260 260 259 259
##  [783] 259 259 259 259 259 259 258 258 258 258 258 258 258 257 257 257 257
##  [800] 257 256 256 256 256 256 256 256 255 255 255 255 255 255 255 255 255
##  [817] 254 254 254 254 254 254 254 254 254 253 253 253 253 253 253 253 253
##  [834] 252 252 252 252 252 252 252 252 252 252 251 251 251 251 251 251 251
##  [851] 251 251 250 250 250 250 250 250 250 250 249 249 249 249 249 248 248
##  [868] 248 248 248 224 223 223 222 222 221 221 221 221 220 220 220 220 220
##  [885] 220 220 220 220 219 219 219 219 219 219 219 219 219 218 218 218 218
##  [902] 218 218 218 218 218 217 217 217 217 217 217 217 217 217 216 216 216
##  [919] 216 216 216 216 216 216 215 215 215 215 215 215 215 215 215 214 214
##  [936] 214 214 214 214 214 214 214 213 213 213 213 213 213 213 213 213 212
##  [953] 212 212 212 212 212 212 212 212 211 211 211 211 211 211 211 211 210
##  [970] 210 210 210 210 210 210 209 209 209 209 209 209 208 208 208 208 208
##  [987] 208 207 207 207 207 207 207 207 207 207 206 206 206 206 206 206 206
## [1004] 206 206 205 205 205 205 205 205 205 205 205 204 204 204 204 204 204
## [1021] 204 204 204 203 203 203 203 203 203 203 203 203 202 202 202 202 202
## [1038] 202 202 201 201 201 201 201 201 201 201 201 201 201 200 200 200 200
## [1055] 200 200 200 200 200 199 199 199 199 199 199 199 199 199 198 198 198
## [1072] 198 198 198 197 197 197 172 171 171 170 170 169 169 169 169 168 168
## [1089] 168 168 168 168 168 168 168 167 167 167 167 167 167 167 167 167 166
## [1106] 166 166 166 166 166 166 166 166 165 165 165 165 165 165 165 165 165
## [1123] 164 164 164 164 164 164 164 164 164 163 163 163 163 163 163 163 163
## [1140] 163 162 162 162 162 162 162 162 162 162 161 161 161 161 161 161 161
## [1157] 161 161 160 160 160 160 160 160 160 160 160 159 159 159 159 159 159
## [1174] 159 159 159 158 158 158 158 158 158 157 157 157 157 157 156 156 156
## [1191] 156 156 156 156 155 155 155 155 155 155 155 155 155 154 154 154 154
## [1208] 154 154 154 154 154 153 153 153 153 153 153 153 153 153 152 152 152
## [1225] 152 152 152 152 152 152 151 151 151 151 151 151 151 151 151 150 150
## [1242] 150 150 150 150 150 150 149 149 149 149 149 149 149 149 149 149 148
## [1259] 148 148 148 148 148 148 148 148 147 147 147 147 147 147 147 147 146
## [1276] 146 146 146 146 146 146 146 145 145 120 119 119 118 118 117 117 117
## [1293] 117 116 116 116 116 116 116 116 116 115 115 115 115 115 115 115 115
## [1310] 114 114 114 114 114 114 114 114 113 113 113 113 113 113 113 113 112
## [1327] 112 112 112 112 112 112 112 111 111 111 111 111 111 111 110 110 110
## [1344] 110 110 110 110 110 109 109 109 109 109 109 109 109 108 108 108 108
## [1361] 108 108 108 107 107 107 107 107 107 107 107 106 106 106 106 106 106
## [1378] 106 106 105 105 105 105 105 105 105 105 104 104 104 104 104 104 104
## [1395] 104 103 103 103 103 103 103 103 103 102 102 102 102 102 102 102 102
## [1412] 101 101 101 101 101 101 101 101 100 100 100 100 100 100 100 100  99
## [1429]  99  99  99  99  99  99  99  98  98  98  98  98  98  97  97  97  97
## [1446]  97  97  97  96  96  96  96  96  96  96  96  95  95  95  95  95  95
## [1463]  95  95  94  94  94  94  94  94  94  94  93  93  93  93  93  93  93
## [1480]  92  68  67  66  66  65  65  64  64  64  64  63  63  63  63  63  63
## [1497]  63  63  62  62  62  62  62  62  62  62  61  61  61  61  61  61  61
## [1514]  61  60  60  60  60  60  60  60  60  59  59  59  59  59  59  59  59
## [1531]  58  58  58  58  58  58  58  58  57  57  57  57  57  57  57  57  56
## [1548]  56  56  56  56  56  56  56  55  55  55  55  55  55  55  54  54  54
## [1565]  54  53  53  53  53  53  52  52  52  52  52  52  52  52  51  51  51
## [1582]  51  51  51  51  51  50  50  50  50  50  50  50  50  49  49  49  49
## [1599]  49  49  49  49  48  48  48  48  48  48  48  48  47  47  47  47  47
## [1616]  47  47  47  46  46  46  46  46  46  46  46  45  45  45  45  45  45
## [1633]  45  45  44  44  44  44  44  44  44  44  43  43  43  43  43  43  43
## [1650]  43  42  42  42  42  42  42  42  41  41  41  41  41  41  41  41  40
## [1667]  15  14  14  13  13  12  12  12  12  11  11  11  11  11  11  11  11
## [1684]  10  10  10  10  10  10  10  10   9   9   9   9   9   9   9   9   8
## [1701]   8   8   8   8   8   8   8   7   7   7   7   7   7   7   7   6   6
## [1718]   6   6   6   6   6   6   5   5   5   5   5   5   5   5   4   4   4
## [1735]   4   4   4   4   4   3   3   3   3   3   3   3   3   2   2   2   2
## [1752]   2   2   2   2   1   1   1
#create a design matrix 
Teams <- sort(as.character(unique(c(as.character(afl_bookies$Home.Team)))))

#Defining the number of things
nTeams <- length(Teams)
nWeeks <- max(afl_bookies$week)
n <- nrow(afl_bookies)

#Defining the design matrix
x <- matrix(0, nrow = dim(afl_bookies)[1], ncol = length(Teams))
for (i in 1:dim(afl_bookies)[1]) {
  x[i, which(as.character(afl_bookies[i,"home"]) == Teams)] <- (1)
  x[i, which(as.character(afl_bookies[i,"away"]) == Teams)] <- (-1)
} 


model.string <-"
model { 
for (i in 1:n) {
y[i] ~ dnorm(mu[i], tauGame)
mu[i] <- alpha + inprod(theta[w[i],],x[i,])
}
for (j in 1:nTeams){
theta[1,j] ~ dnorm(0, tauSeason)
}
for (www in 2:nWeeks) {  
for (j in 1:nTeams) {
theta[www,j] ~ dnorm(gammaWeek*theta[www-1,j], tauWeek)
}
}
alpha ~ dnorm(0,0.0001)
tauGame ~ dunif(0,1000) #uncertainty in outcome for each game
tauWeek ~ dunif(0,1000) 
tauSeason ~ dunif(0,1000) #variance parameter for the first week of the season
gammaWeek ~ dunif(0,1.5)
}
"
model.spec<-textConnection(model.string)

library(rjags)
n.chains <- 3 
n.adapt <- n.update <- n.draws <- 1000

posteriorDraws = c('alpha','theta')
thin <- 5
jags <- jags.model(model.spec,
                   data = list('y' = y,'x' = x, 'w' = w, 'n' = n,'nTeams' = nTeams,'nWeeks' = nWeeks), 
                   n.chains = n.chains, n.adapt = n.adapt)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1077
##    Unobserved stochastic nodes: 9002
##    Total graph size: 54294
## 
## Initializing model
update(jags, n.update)
z <- jags.samples(jags, posteriorDraws, n.draws, thin = thin)

colours <- c("#7fc97f", "#beaed4", "#fdc086")
hfas <- data.frame(round(z$alpha[,,], 3))  %>% mutate(draw = 1:n())
hfas %>% ggplot(aes(draw, X1)) +
  geom_line(colour = colours[1]) + 
  geom_line(data = hfas, aes(draw, X2), colour = colours[2]) + 
  geom_line(data = hfas, aes(draw, X3), colour = colours[3]) + 
  xlab("Draw") + 
  ggtitle("Home advantage (logit scale)") + 
  ylab("") + 
  theme_bw()

There you go, pretty cool!

To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.

Search R-bloggers

Sponsors

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)