2012 Olympics Swimming - 100m Butterfly Men Finals prediction
Author: Matt Malin
Inspired by mages’ blog with predictions for 100m running times, I’ve decided to perform some basic modelling (loess and linear modelling) on previous Olympic results for the 100m Butterfly Men’s medal winning results.
Code setup
<span>library</span>(XML)
<span>library</span>(ggplot2)
swimming_path <- <span>"http://www.databasesports.com/olympics/sport/sportevent.htm?sp=SWI&enum=200"</span>
swimming_data <- <span>readHTMLTable</span>(
<span>readLines</span>(swimming_path),
<span>which =</span> <span>3</span>,
<span>stringsAsFactors =</span> <span>FALSE</span>)
<span># due to some potential errors in passing header = TRUE:</span>
<span>names</span>(swimming_data) <- swimming_data[<span>1</span>, ]
swimming_data <- swimming_data[-<span>1</span>, ]
swimming_data[[<span>"Result"</span>]] <- <span>as.numeric</span>(swimming_data[[<span>"Result"</span>]])
swimming_data[[<span>"Year"</span>]] <- <span>as.numeric</span>(swimming_data[[<span>"Year"</span>]])
swimming_data <- <span>na.omit</span>(swimming_data)
loess_prediction <- function(
<span>medal_type =</span> <span>"GOLD"</span>,
<span>prediction_year =</span> <span>2012</span>)
{
medal_type <- <span>toupper</span>(medal_type)
swimming_loess <- <span>loess</span>(
Result ~ Year,
<span>subset</span>(swimming_data, Medal == medal_type),
<span>control =</span> <span>loess.control</span>(<span>surface =</span> <span>"direct"</span>))
swimming_prediction <- <span>predict</span>(
swimming_loess,
<span>data.frame</span>(<span>Year =</span> prediction_year),
<span>se =</span> <span>FALSE</span>)
<span>return</span>(swimming_prediction)
}
log_lm_prediction <- function(
<span>medal_type =</span> <span>"GOLD"</span>,
<span>prediction_year =</span> <span>2012</span>)
{
medal_type <- <span>toupper</span>(medal_type)
swimming_log_lm <- <span>lm</span>(
<span>log</span>(Result) ~ Year,
<span>subset</span>(swimming_data, Medal == medal_type))
swimming_prediction <- <span>exp</span>(<span>predict</span>(
swimming_log_lm,
<span>data.frame</span>(<span>Year =</span> prediction_year),
<span>se =</span> <span>FALSE</span>))
<span>return</span>(swimming_prediction)
}
swimming_data <- <span>rbind</span>(
<span>data.frame</span>(
swimming_data[<span>c</span>(<span>"Year"</span>, <span>"Medal"</span>, <span>"Result"</span>)],
<span>type =</span> <span>"actual"</span>),
<span>data.frame</span>(
<span>Year =</span> <span>rep</span>(<span>2012</span>, <span>3</span>),
<span>Medal =</span> <span>c</span>(<span>"GOLD"</span>, <span>"SILVER"</span>, <span>"BRONZE"</span>),
<span>Result =</span> <span>c</span>(
<span>loess_prediction</span>(<span>"gold"</span>),
<span>loess_prediction</span>(<span>"silver"</span>),
<span>loess_prediction</span>(<span>"bronze"</span>)),
<span>type =</span> <span>rep</span>(<span>"loess_prediction"</span>, <span>3</span>)))
medal_colours <- <span>c</span>(
<span>GOLD =</span> <span>rgb</span>(<span>201</span>, <span>137</span>, <span>16</span>, <span>maxColorValue =</span> <span>255</span>),
<span>SILVER =</span> <span>rgb</span>(<span>168</span>, <span>168</span>, <span>168</span>, <span>maxColorValue =</span> <span>255</span>),
<span>BRONZE =</span> <span>rgb</span>(<span>150</span>, <span>90</span>, <span>56</span>, <span>maxColorValue =</span> <span>255</span>))
swimming_plot <- <span>ggplot</span>(
swimming_data,
<span>aes</span>(
<span>x =</span> Year,
<span>y =</span> Result,
<span>colour =</span> Medal,
<span>group =</span> Medal)) +
<span>scale_x_continuous</span>(<span>limits =</span> <span>c</span>(<span>1968</span>, <span>2012</span>)) +
<span>geom_point</span>() +
<span>stat_smooth</span>(
<span>aes</span>(<span>fill =</span> Medal),
<span>alpha =</span> <span>0.25</span>,
<span>data =</span> <span>subset</span>(swimming_data, <span>type =</span> <span>"actual"</span>),
<span>fullrange =</span> <span>FALSE</span>,
<span>method =</span> loess)
swimming_plot <- swimming_plot +
<span>scale_fill_manual</span>(<span>values =</span> medal_colours) +
<span>scale_colour_manual</span>(<span>values =</span> medal_colours) + <span>theme_bw</span>()
Predictions
...
[Read more...]