Exploring Different Squigglers HGA

June 29, 2018
By

(This article was first published on Analysis of AFL, and kindly contributed to R-bloggers)

library(fitzRoy)
library(tidyverse)
## -- Attaching packages --------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.5
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(mgcv)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
afltables<-fitzRoy::get_match_results()
tips <- get_squiggle_data("tips")
## Getting data from https://api.squiggle.com.au/?q=tips
afltables<-afltables%>%mutate(Home.Team = str_replace(Home.Team, "GWS", "Greater Western Sydney"))

afltables<-afltables %>%mutate(Home.Team = str_replace(Home.Team, "Footscray", "Western Bulldogs"))

unique(afltables$Home.Team)
##  [1] "Fitzroy"                "Collingwood"           
##  [3] "Geelong"                "Sydney"                
##  [5] "Essendon"               "St Kilda"              
##  [7] "Melbourne"              "Carlton"               
##  [9] "Richmond"               "University"            
## [11] "Hawthorn"               "North Melbourne"       
## [13] "Western Bulldogs"       "West Coast"            
## [15] "Brisbane Lions"         "Adelaide"              
## [17] "Fremantle"              "Port Adelaide"         
## [19] "Gold Coast"             "Greater Western Sydney"
names(afltables)
##  [1] "Game"         "Date"         "Round"        "Home.Team"   
##  [5] "Home.Goals"   "Home.Behinds" "Home.Points"  "Away.Team"   
##  [9] "Away.Goals"   "Away.Behinds" "Away.Points"  "Venue"       
## [13] "Margin"       "Season"       "Round.Type"   "Round.Number"
names(tips)
##  [1] "venue"       "hteamid"     "tip"         "correct"     "date"       
##  [6] "round"       "ateam"       "bits"        "year"        "confidence" 
## [11] "updated"     "tipteamid"   "gameid"      "ateamid"     "err"        
## [16] "sourceid"    "margin"      "source"      "hconfidence" "hteam"
tips$date<-ymd_hms(tips$date)

tips$date<-as.Date(tips$date)

afltables$Date<-ymd(afltables$Date)
joined_dataset<-left_join(tips, afltables, by=c("hteam"="Home.Team", "date"="Date"))

df<-joined_dataset%>%
  select(hteam, ateam,tip,correct, hconfidence, round, date,
         source, margin, Home.Points, Away.Points, year)%>%
  mutate(squigglehomemargin=if_else(hteam==tip, margin, -margin), 
         actualhomemargin=Home.Points-Away.Points, 
         hconfidence=hconfidence/100)%>%
  filter(source=="PlusSixOne")%>%
    select(round, hteam, ateam, hconfidence, squigglehomemargin, actualhomemargin, correct)
df<-df[complete.cases(df),]

df$hteam<-as.factor(df$hteam)
df$ateam<-as.factor(df$ateam)
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial")

df$logitChance = log(df$hconfidence)/log(100-df$hconfidence)


ft=gam(I(actualhomemargin>0)~s(logitChance),data=df,family="binomial")


preds = predict(ft,type="response",se.fit=TRUE)
predSort=sort(preds$fit,index.return=TRUE)

plot(predSort$x~df$hconfidence[predSort$ix],col="red",type="l")

abline(h=0.5,col="blue")
abline(v=50,col="blue")
abline(c(0,1),col="purple")
lines(df$hconfidence[predSort$ix],predSort$x+2*preds$se.fit[predSort$ix])
lines(df$hconfidence[predSort$ix],predSort$x-2*preds$se.fit[predSort$ix])

# predicting winners
ft=gam(I(actualhomemargin>0)~s(hconfidence),data=df,family="binomial",sp=0.05)
# the 0.05 was to make it a bit wiggly but not too silly (the default was not monotonically increasing, which is silly)
plot(ft,rug=FALSE,trans=binomial()$linkinv)
abline(h=0.5,col="blue")
abline(v=0.5,col="blue")
abline(c(0,1),col="purple")

# predicting margins
ft=gam(actualhomemargin~s(hconfidence),data=df)
plot(ft,rug=FALSE,residual=TRUE,pch=1,cex=0.4)
abline(h=0.5,col="blue")
abline(v=0.5,col="blue")

# add squiggle margins to the plot
confSort = sort(df$hconfidence,index.return=TRUE)
lines(confSort$x,df$squigglehomemargin[confSort$ix],col="purple")

To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.

Search R-bloggers

Sponsors

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)