Maths in Sport Script

May 17, 2018
By

(This article was first published on Analysis of AFL, and kindly contributed to R-bloggers)

############################################################################
#BROWNLOW PREDICTION WITH FREE DATA!!!!

        ################
        ### 

##from fitzRoy figures
# ptm <- proc.time()
library(tidyverse)
df<-fitzRoy::get_afltables_stats(start_date = "1897-01-01", end_date = Sys.Date())
names(df)
# df<-afldata::afldata
team_stats<-df%>%
  dplyr::select(Date, First.name,Surname,Season, Round, Playing.for, Kicks:Goal.Assists)%>%
  group_by(Date, Season, Round, Playing.for)%>%
  summarise_if(is.numeric,funs(sum=c(sum(.))))

player_stats<-df%>%
  dplyr::select(Date, First.name,Surname,Season, Round, Playing.for, Kicks:Goal.Assists)

complete_df<-left_join(player_stats,team_stats, by=c("Date"="Date", "Season"="Season",  "Playing.for"="Playing.for"))

#but we also need margins as per honours stuff

dataset_scores<-fitzRoy::match_results
names(dataset_scores)
dataset_scores1<-dataset_scores%>%dplyr::select (Date, Round, Home.Team, Home.Points,Game)
dataset_scores2<-dplyr::select(dataset_scores, Date, Round, Away.Team, Away.Points,Game)

colnames(dataset_scores1)[3]<-"Team"
colnames(dataset_scores1)[4]<-"Points"
colnames(dataset_scores2)[3]<-"Team"
colnames(dataset_scores2)[4]<-"Points"

df5<-rbind(dataset_scores1,dataset_scores2)
dataset_margins<-df5%>%group_by(Game)%>%
  arrange(Game)%>%
  mutate(margin=c(-diff(Points),diff(Points)))
# View(dataset_margins)
dataset_margins$Date<-as.Date(dataset_margins$Date)
complete_df$Date<-as.Date(complete_df$Date)

complete_df<-left_join(complete_df,dataset_margins,by=c("Date"="Date",  "Playing.for"="Team"))


complete_df_ratio<-complete_df%>%
  mutate(kick.ratio=Kicks/Kicks_sum,
         Marks.ratio=Marks/Marks_sum,
         handball.ratio=Handballs/Handballs_sum,
         Goals.ratio=Goals/Goals_sum,
         behinds.ratio=Behinds/Behinds_sum,
         hitouts.ratio=Hit.Outs/Hit.Outs_sum,
         tackles.ratio=Tackles/Tackles_sum,
         rebounds.ratio=Rebounds/Rebounds_sum,
         inside50s.ratio=Inside.50s/Inside.50s_sum,
         clearances.ratio=Clearances/Clearances_sum,
         clangers.ratio=Clangers/Clangers_sum,
         freefors.ratio=Frees.For/Frees.For_sum,
         freesagainst.ratio=Frees.Against/Frees.Against_sum,
         Contested.Possessions.ratio=Contested.Possessions/Contested.Possessions_sum,
         Uncontested.Possessions.ratio=Uncontested.Possessions/Uncontested.Possessions_sum,
         contested.marks.ratio=Contested.Marks/Contested.Marks_sum,
         marksinside50.ratio=Marks.Inside.50/Marks.Inside.50_sum,
         one.percenters.ratio=One.Percenters/One.Percenters_sum,
         bounces.ratio=Bounces/Bounces_sum,
         goal.assists.ratio=Goal.Assists/Goal.Assists_sum,
         disposals.ratio=(Kicks+Handballs)/(Kicks_sum+Handballs_sum))
df<-complete_df_ratio%>%dplyr::select(Date, First.name, Surname, Season, Round.x, Playing.for,-Brownlow.Votes, Brownlow.Votes_sum,everything())
df<-df%>%dplyr::select(-Brownlow.Votes,everything())
df[is.na(df)] <- 0
in.sample  <- subset(df, Season %in% c(2013:2016))

in.sample$Brownlow.Votes <- factor(in.sample$Brownlow.Votes)

in.sample<-in.sample%>%filter(Round.x %in% c("1","2","3","4","5","6","7","8",
                                             "9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24"))


names(in.sample)

in.sample$Player<-paste(in.sample$First.name,in.sample$Surname)

in.sample<-in.sample%>%dplyr::select(Player, Date, Season, Round.x, Playing.for, margin:Brownlow.Votes)




library(ordinal)

fm1<-clm(Brownlow.Votes~ kick.ratio +  handball.ratio +  Marks.ratio +  
           disposals.ratio+  hitouts.ratio+
           freefors.ratio +  freesagainst.ratio +  tackles.ratio +  Goals.ratio +   behinds.ratio + Contested.Possessions.ratio+
           Uncontested.Possessions.ratio +  clangers.ratio +    contested.marks.ratio + marksinside50.ratio +
           clearances.ratio +   rebounds.ratio +    inside50s.ratio +   one.percenters.ratio +  bounces.ratio+
           goal.assists.ratio  +margin, 
         data = in.sample)

library(MASS)

fm2<- stepAIC(fm1, direction='backward',type=AIC)

                      ####################
                    ###Get the out.sample

### Example using data from footywire to show that in fitzRoy 
### fans have access to both popular websites

names(fitzRoy::player_stats)
df_2017<-fitzRoy::player_stats%>%
  filter(Season==2017)

team_stats_out<-df_2017%>%
  dplyr::select(Date, Player,Season, Round, Team, CP:T5)%>%
  group_by(Date,Season, Round, Team)%>%
  summarise_if(is.numeric,funs(sum=c(sum(.))))

player_stats_out<-df_2017%>%
  dplyr::select(Date, Player,Season, Round, Team, CP:T5)


complete_df_out<-left_join(player_stats_out,team_stats_out, by=c("Date"="Date", "Season"="Season",  "Team"="Team"))



dataset_scores<-fitzRoy::match_results
names(dataset_scores)
dataset_scores1<-dataset_scores%>%dplyr::select (Date, Round, Home.Team, Home.Points,Game)
dataset_scores2<-dplyr::select(dataset_scores, Date, Round, Away.Team, Away.Points,Game)

colnames(dataset_scores1)[3]<-"Team"
colnames(dataset_scores1)[4]<-"Points"
colnames(dataset_scores2)[3]<-"Team"
colnames(dataset_scores2)[4]<-"Points"


df5<-rbind(dataset_scores1,dataset_scores2)
dataset_margins<-df5%>%group_by(Game)%>%
  arrange(Game)%>%
  mutate(margin=c(-diff(Points),diff(Points)))
dataset_margins$Date<-as.Date(dataset_margins$Date)
complete_df_out$Date<-as.Date(complete_df_out$Date)

dataset_margins<-dataset_margins %>%mutate(Team = str_replace(Team, "Brisbane Lions", "Brisbane"))

dataset_margins<-dataset_margins %>%mutate(Team = str_replace(Team, "Footscray", "Western Bulldogs"))


complete_df_out<-left_join(complete_df_out,dataset_margins,by=c("Date"="Date",  "Team"="Team"))

names(complete_df_out)

####create the new ratios
complete_df_ratio_out<-complete_df_out%>%
  mutate(kick.ratio=K/K_sum,
         Marks.ratio=M/M_sum,
         handball.ratio=HB/HB_sum,
         Goals.ratio=G/G_sum,
         behinds.ratio=B/B_sum,
         hitouts.ratio=HO/HO_sum,
         tackles.ratio=T/T_sum,
         rebounds.ratio=R50/R50_sum,
         inside50s.ratio=I50/I50_sum,
         clearances.ratio=(CCL+SCL)/(CCL_sum+SCL_sum),
         clangers.ratio=CL/CL_sum,
         freefors.ratio=FF/FF_sum,
         freesagainst.ratio=FA/FA_sum,
         Contested.Possessions.ratio=CP/CP_sum,
         Uncontested.Possessions.ratio=UP/UP_sum,
         contested.marks.ratio=CM/CM_sum,
         marksinside50.ratio=MI5/MI5_sum,
         one.percenters.ratio=One.Percenters/One.Percenters_sum,
         bounces.ratio=BO/BO_sum,
         goal.assists.ratio=GA/GA_sum,
         disposals.ratio=D/D_sum)




conforming<-complete_df_ratio_out%>%
  dplyr::select(Player, Date, Season, Round.x, Team, margin, 
                kick.ratio:disposals.ratio)

conforming$Brownlow.Votes<-0
out.sample=conforming

newdata   <- out.sample[ , -ncol(out.sample)]


pre.dict    <- predict(fm2,newdata=newdata, type='prob')
pre.dict.m  <- data.frame(matrix(unlist(pre.dict), nrow= nrow(newdata)))
colnames(pre.dict.m) <- c("vote.0", "vote.1", "vote.2", "vote.3")

newdata.pred  <- cbind.data.frame(newdata, pre.dict.m)


#### Step 1: Get expected value on Votes
newdata.pred$expected.votes <- newdata.pred$vote.1 + 2*newdata.pred$vote.2 + 3*newdata.pred$vote.3

####Join back on matchID whoops!


get_match_ID<-fitzRoy::player_stats

xx<-get_match_ID%>%dplyr::select(Date, Player, Match_id)
newdata.pred<-left_join(newdata.pred, xx, by=c("Date"="Date",  "Player"="Player"))



newdata.pred<-filter(newdata.pred, Date<"2017-09-01")


sum1 <- aggregate(vote.1~Match_id, data = newdata.pred, FUN = sum ); names(sum1) <- c("Match_id", "sum.vote.1");
sum2 <- aggregate(vote.2~Match_id, data = newdata.pred, FUN = sum ); names(sum2) <- c("Match_id", "sum.vote.2");
sum3 <- aggregate(vote.3~Match_id, data = newdata.pred, FUN = sum ); names(sum3) <- c("Match_id", "sum.vote.3");

#### Step 3: Add sum of each vote by matchId to big table
newdata.pred <- merge(newdata.pred, sum1, by = "Match_id")
newdata.pred <- merge(newdata.pred, sum2, by = "Match_id")
newdata.pred <- merge(newdata.pred, sum3, by = "Match_id")

#### Step 4: Add std1/2/3
newdata.pred$std.1  <- (newdata.pred$sum.vote.1/newdata.pred$vote.1)^-1
newdata.pred$std.2  <- (newdata.pred$sum.vote.2/newdata.pred$vote.2)^-1
newdata.pred$std.3  <- (newdata.pred$sum.vote.3/newdata.pred$vote.3)^-1


#### Step 5: Expected standard game vote
newdata.pred$exp_std_game_vote <- newdata.pred$std.1 + 2*newdata.pred$std.2 + 3*newdata.pred$std.3  


#### Step 6: List of winners

newdata.pred$PlayerName<-paste(newdata.pred$Player," ",newdata.pred$Team)
winners.stdgame   <- aggregate(exp_std_game_vote~PlayerName, data = newdata.pred, FUN = sum );
winners.stdgame   <- winners.stdgame[order(-winners.stdgame$exp_std_game_vote), ]
winners.stdgame[1:10, ]

# proc.time() - ptm

To leave a comment for the author, please follow the link and comment on their blog: Analysis of AFL.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.

Search R-bloggers

Sponsors

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)