[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Crude oil futures prices have fluctuated above the point forecast line (XGBoost) year to date.
library(tidyverse)
library(tidymodels)
library(tidyquant)
library(timetk)
library(modeltime)
library(workflowsets)
#Crude Oil Futures(USD) (Index 2014 = 100)
df_crude_oil <-
tq_get("CL=F") %>%
tq_transmute(select = close,
mutate_fun = to.monthly,
col_rename = "crude_oil") %>%
mutate(date = as.Date(date))
#Federal Funds Effective Rate
df_fed_funds <-
tq_get("FEDFUNDS", get = "economic.data") %>%
select(date, fed_funds = price)
#Industrial Production: Mining: Crude Oil (NAICS = 21112) (Index 2014 = 100)
df_crude_oil_production <-
tq_get("IPG21112S", get = "economic.data") %>%
select(date, crude_oil_production = price)
#Merging all the data sets
df_merged <-
df_crude_oil %>%
left_join(df_fed_funds) %>%
left_join(df_crude_oil_production) %>%
drop_na()
#Splitting tha data
df_split <-
df_merged %>%
time_series_split(assess = "1 year",
cumulative = TRUE)
df_train <- training(df_split)
df_test <- testing(df_split)
#Bootstrapping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train,
times = 100)
#Preprocessing
rec_all <-
recipe(crude_oil ~ ., data = df_train) %>%
step_mutate(date_num = as.numeric(date)) %>%
step_rm(date) %>%
step_normalize(all_numeric_predictors())
#Models
#Radial basis function support vector machines (SVMs) via kernlab
mod_svm_rbf <-
svm_rbf(cost = tune(),
rbf_sigma = tune(),
margin = tune()) %>%
set_engine("kernlab") %>%
set_mode("regression")
#Multivariate adaptive regression splines (MARS) via earth
mod_mars <-
mars(num_terms = tune(),
prune_method = tune()) %>%
set_engine("earth", nfold = 10) %>%
set_mode("regression")
#Boosted trees via xgboost
mod_boost_tree <-
boost_tree(mtry = tune(),
trees = tune(),
min_n = tune(),
learn_rate = tune()) %>%
set_engine("xgboost") %>%
set_mode("regression")
#Workflow sets
wflow_all <-
workflow_set(
preproc = list(svm_all = rec_all),
models = list(SVM_rbf = mod_svm_rbf,
MARS = mod_mars,
XGBoost = mod_boost_tree)) %>%
#Making the workflow ID's a little more simple:
mutate(wflow_id = str_remove(wflow_id, "svm_all_"))
#Tuning and evaluating all the models
grid_ctrl <-
control_grid(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = TRUE
)
grid_results <-
wflow_all %>%
workflow_map(
seed = 98765,
resamples = df_folds,
grid = 10,
control = grid_ctrl
)
#Accuracy of the grid results
grid_results %>%
rank_results(select_best = TRUE,
rank_metric = "rsq") %>%
select(Models = wflow_id, .metric, mean)
#Finalizing the model with the best parameters
best_param <-
grid_results %>%
extract_workflow_set_result("XGBoost") %>%
select_best(metric = "rsq")
wflw_fit <-
grid_results %>%
extract_workflow("XGBoost") %>%
finalize_workflow(best_param) %>%
fit(df_train)
#Calibration data
df_cal <-
wflw_fit %>%
modeltime_calibrate(new_data = df_test)
#Predictive intervals for XGBoost
df_cal %>%
modeltime_forecast(actual_data = df_merged %>%
filter(date >= last(date) - months(12)),
new_data = df_test) %>%
plot_modeltime_forecast(.interactive = FALSE,
.legend_show = FALSE,
.line_size = 1,
.color_lab = "",
.title = "Confidence Intervals for Crude Oil Future Prices (WTI)") +
labs(subtitle = "Using XGBoost") +
theme_minimal(base_family = "Bricolage Grotesque",
base_size = 16)
To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.
R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
