Site icon R-bloggers

Exploring Different Squigglers HGA

[This article was first published on Analysis of AFL, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
library(fitzRoy)
library(tidyverse)
## -- Attaching packages --------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.5
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
afltables<-fitzRoy::get_match_results()
tips <- get_squiggle_data("tips")
## Getting data from https://api.squiggle.com.au/?q=tips
afltables<-afltables%>%mutate(Home.Team = str_replace(Home.Team, "GWS", "Greater Western Sydney"))

afltables<-afltables %>%mutate(Home.Team = str_replace(Home.Team, "Footscray", "Western Bulldogs"))

unique(afltables$Home.Team)
##  [1] "Fitzroy"                "Collingwood"           
##  [3] "Geelong"                "Sydney"                
##  [5] "Essendon"               "St Kilda"              
##  [7] "Melbourne"              "Carlton"               
##  [9] "Richmond"               "University"            
## [11] "Hawthorn"               "North Melbourne"       
## [13] "Western Bulldogs"       "West Coast"            
## [15] "Brisbane Lions"         "Adelaide"              
## [17] "Fremantle"              "Port Adelaide"         
## [19] "Gold Coast"             "Greater Western Sydney"
names(afltables)
##  [1] "Game"         "Date"         "Round"        "Home.Team"   
##  [5] "Home.Goals"   "Home.Behinds" "Home.Points"  "Away.Team"   
##  [9] "Away.Goals"   "Away.Behinds" "Away.Points"  "Venue"       
## [13] "Margin"       "Season"       "Round.Type"   "Round.Number"
names(tips)
##  [1] "venue"       "hteamid"     "tip"         "correct"     "date"       
##  [6] "round"       "ateam"       "bits"        "year"        "confidence" 
## [11] "updated"     "tipteamid"   "gameid"      "ateamid"     "err"        
## [16] "sourceid"    "margin"      "source"      "hconfidence" "hteam"
tips$date<-ymd_hms(tips$date)

tips$date<-as.Date(tips$date)

afltables$Date<-ymd(afltables$Date)
joined_dataset<-left_join(tips, afltables, by=c("hteam"="Home.Team", "date"="Date"))

df<-joined_dataset%>%
  select(hteam, ateam,tip,correct, hconfidence, round, date,
         source, margin, Home.Points, Away.Points, year)%>%
  mutate(squigglehomemargin=if_else(hteam==tip, margin, -margin), 
         actualhomemargin=Home.Points-Away.Points, 
         hconfidence=hconfidence/100)%>%
  filter(source=="PlusSixOne")%>%
    select(round, hteam, ateam, hconfidence, squigglehomemargin, actualhomemargin, correct)
df<-df[complete.cases(df),]

df$hteam<-as.factor(df$hteam)
df$ateam<-as.factor(df$ateam)
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial")

df$logitChance = log(df$hconfidence)/log(100-df$hconfidence)


ft=gam(I(actualhomemargin>0)~s(logitChance),data=df,family="binomial")


preds = predict(ft,type="response",se.fit=TRUE)
predSort=sort(preds$fit,index.return=TRUE)

plot(predSort$x~df$hconfidence[predSort$ix],col="red",type="l")

abline(h=0.5,col="blue")
abline(v=50,col="blue")
abline(c(0,1),col="purple")
lines(df$hconfidence[predSort$ix],predSort$x+2*preds$se.fit[predSort$ix])
lines(df$hconfidence[predSort$ix],predSort$x-2*preds$se.fit[predSort$ix])

# predicting winners
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial",sp=0.05)
# the 0.05 was to make it a bit wiggly but not too silly (the default was not monotonically increasing, which is silly)
plot(ft,rug=FALSE,trans=binomial()$linkinv)
abline(h=0.5,col="blue")
abline(v=0.5,col="blue")
abline(c(0,1),col="purple")

# predicting margins
ft=gam(actualhomemargin~s(hconfidence),data=df)
plot(ft,rug=FALSE,residual=TRUE,pch=1,cex=0.4)
abline(h=0.5,col="blue")
abline(v=0.5,col="blue")

# add squiggle margins to the plot
confSort = sort(df$hconfidence,index.return=TRUE)
lines(confSort$x,df$squigglehomemargin[confSort$ix],col="purple")

To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.