Time Series Forecasting: The BIST Banks Index

[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

The Turkish Banking sector has always been considered the decisive indicator of the Turkish economy. After the transition to the Orthodox monetary policies, the Central Bank’s interest rate has been rising to 50% in the short term. This has instituted confidence, especially for foreign investors.

This confidence has lifted the banking index (XBANK) year to date. I wonder about the remainder of the year. So, we will predict the next 8 months of the index in this article. The explanatory variable we are going to use is one-week repo rates in Turkey.

library(tidyverse)
library(tidymodels)
library(tidyquant)
library(timetk)
library(readxl)

#XBANK index data
df_xbank <- 
  tq_get("XBANK.IS", to = "2024-05-01") %>% 
  tq_transmute(select = close,
               mutate_fun = to.monthly,
               col_rename = "xbank") %>% 
  mutate(date = as.Date(date))
  

#TCMB(Central Bank of the Republic of Turkey) One-Week Repo Rate
df_tcmb_rates <- 
  read_csv("https://raw.githubusercontent.com/mesdi/blog/main/tcmb_repo.csv") %>% 
  janitor::clean_names() %>% 
  select(date = release_date, tcmb_rates = actual) %>% 
  mutate(date = 
           #removing parentheses and the text within
           case_when(str_detect(date," \\(.*\\)") ~ str_remove(date," \\(.*\\)"), 
                     TRUE ~ date) %>% 
           parse_date(date, format = "%b %d, %Y") %>% 
           #adding one month
           floor_date("month") %m+% months(1),
         tcmb_rates = str_remove(tcmb_rates, "%") %>% as.numeric()) %>%
  #makes regular time series by filling the time gaps
  pad_by_time(date, .by = "month") %>% 
  tidyr::fill(tcmb_rates, .direction = "up") %>% 
  #mutate(across(tcmb_rates, .fns = \(x) ts_impute_vec(x, period = 1))) %>% 
  drop_na()

#Merging all the datasets
df_merged <- 
  df_tcmb_rates %>% 
  left_join(df_xbank) %>% 
  drop_na()

Once we build our data set we can pass the modeling phase. We will use many models to find the best suit for the data. To do that we will tune and evaluate all the models to their related parameters and rank the accuracy results in a table.

#Modeling
library(tidymodels)
library(modeltime)

#Split Data 
library(timetk)

split <- 
  df_merged %>% 
  time_series_split(assess = "1 year", 
                    cumulative = TRUE)

df_train <- training(split)
df_test <- testing(split)

#Bootstraping for tuning
set.seed(12345)
df_folds <- bootstraps(df_train, 
                       strata = xbank,
                       times = 100)


#Model 1: auto_arima 
#(https://business-science.github.io/modeltime/reference/arima_reg.html)
model_arima_reg <- 
  arima_reg() %>%
  set_engine(engine = "auto_arima")

#Model 2: exp_smoothing via ets
#(https://business-science.github.io/modeltime/reference/exp_smoothing.html)
model_ets <- 
  exp_smoothing() %>%
  set_engine(engine = "ets") 

#Model 3: prophet_reg
#(https://business-science.github.io/modeltime/reference/prophet_reg.html)
model_prophet <- 
  prophet_reg(changepoint_num = tune()) %>%
  set_engine(engine = "prophet")


#Model 4: arima_boost 
#(https://business-science.github.io/modeltime/reference/arima_boost.html)
model_arima_boost <- 
  arima_boost(
    min_n = tune(),
    learn_rate = tune(),
    trees = tune()
  ) %>%
  set_engine(engine = "auto_arima_xgboost")


#Model 5: earth ----
#(https://parsnip.tidymodels.org/reference/mars.html)
model_spec_mars <- 
  mars(mode = "regression",
       num_terms = tune(),
       prod_degree = tune(),
       prune_method = tune()) %>%
  set_engine("earth") 


#Building workflow sets
library(workflowsets)

#Workflow set for ARIMA, ETS and Prophet
rec_formula <- 
  recipe(xbank ~ ., data = df_train)  

wflw_formula <- 
  workflow_set(
    preproc = list(formula = rec_formula),
    models = list(ARIMA_reg = model_arima_reg,
                  ETS = model_ets,
                  Prophet = model_prophet)
  )

#Workflow set for Boosted ARIMA
rec_boost <- 
  rec_formula %>% 
  step_timeseries_signature(date) %>%
  step_rm(contains("iso"), 
          contains("minute"),
          contains("hour"),
          contains("am.pm"), 
          contains("xts")) %>%
  step_normalize(contains("index.num"), date_year) %>%
  step_zv(all_predictors()) %>% 
  step_dummy(contains("lbl"), one_hot = TRUE) 

wflw_boost <- 
  workflow_set(
    preproc = list(rec_boost = rec_boost),
    models = list(ARIMA_boost = model_arima_boost)
  )

#Workflow for Mars
rec_mars <- 
  rec_formula %>%
  step_date(date, 
            features = "month", 
            ordinal = FALSE) %>%
  step_mutate(date_num = as.numeric(date)) %>%
  step_normalize(date_num) %>%
  step_rm(date)

wflw_mars <- 
  workflow_set(
    preproc = list(rec_mars = rec_mars),
    models = list(Mars = model_spec_mars)
  )

#Combining all the workflows
all_wflws <- 
  bind_rows(wflw_formula,
            wflw_boost,
            wflw_mars) %>% 
  # Make the workflow ID's a little more simple: 
  mutate(wflow_id = gsub("(rec_boost_)|(formula_)|(rec_mars_)", 
                         "", 
                         wflow_id))

#Tuning and evaluating all the models
grid_ctrl <-
  control_grid(
    save_pred = TRUE,
    parallel_over = "everything",
    save_workflow = TRUE
  )

grid_results <-
  all_wflws %>%
  workflow_map(
    seed = 98765,
    resamples = df_folds,
    grid = 10,
    control = grid_ctrl
  )

#Accuracy table of the grid results
library(gt)

grid_results %>% 
  rank_results(select_best = TRUE, 
               rank_metric = "rsq") %>%
  select(Models = wflow_id, .metric, mean) %>% 
  pivot_wider(names_from = .metric,
              values_from = mean) %>% 
  mutate(across(2:3, round, 2)) %>% 
  gt(id = "my_table") %>% 
  data_color(
    method = "numeric",
    palette = c("red", "green")
  ) %>% 
  cols_align(align = "center", columns = -Models) %>% 
  #separating alignment of column names from cells-alignment
  tab_style(
    style = cell_text(align = "left"), 
    locations = cells_body(
      columns = Models
    )) %>% 
  #separating cell body from each other
  tab_style(
    style = cell_borders(sides = "all", 
                         color = "white",
                         weight = px(12), 
                         style = "solid"),
    locations = cells_body(columns = everything())) %>% 
  #Rounded corners for the entire table
  opt_css(css = "
    #my_table .gt_row {
      border-radius: 30px;
    }
    ") %>% 
  #Changing the column names
  cols_label(
    Models = md("**Models**"),
    rmse = md("**RMSE**"),
    rsq = md("**R-Squared**")
  ) %>% 
  tab_header(title = "Accuracy ranking of the models") %>% 
  opt_table_font(font = "Bricolage Grotesque")

According to the above table, we will choose the MARS model to predict the index. To do that, we will first finalize the model with the best parameters, according to the above accuracy results. The new accuracy results for our testing data look like dropped a little bit, but still fine.

#Finalizing the model with the best parameters
mars_best_param <- 
  grid_results %>%
  extract_workflow_set_result("Mars") %>% 
  select_best(metric = "rsq")


mars_fit_wflw <- 
  grid_results %>% 
  extract_workflow("Mars") %>% 
  finalize_workflow(mars_best_param) %>% 
  fit(df_train)

#Calibrate the model to the testing set
calibration_tbl_mars <- 
  mars_fit_wflw %>%
  modeltime_calibrate(new_data = df_test)

#Accuracy of the finalized model
calibration_tbl_mars %>%
  modeltime_accuracy(metric_set = metric_set(rmse,rsq))

# A tibble: 1 × 5
#  .model_id .model_desc .type   rmse   rsq
#      <int> <chr>       <chr>  <dbl> <dbl>
#1         1 EARTH       Test  53963. 0.849

Now, we can make forecasts with our finalized model. As I built unseen data for forecasting, I chose %50 for one-week repo rates in Turkey intuitively.

#Forecasting

#Refit to full dataset for forcasting
refit_tbl_mars <- 
  calibration_tbl_mars %>%
  modeltime_refit(data = df_merged)

#Future(unseen) data frame
library(tsibble)
library(fable)

date <- 
  df_merged %>% 
  mutate(date = yearmonth(date)) %>% 
  as_tsibble() %>% 
  new_data(8) %>%
  as_tibble() %>% 
  mutate(date = as.Date(date))


df_future <- 
  date %>% 
  mutate(tcmb_rates = c(rep(50,8)))


xbank_fc <- 
  refit_tbl_mars %>% 
  modeltime_forecast(new_data = df_future) %>% 
  select(Date = .index, XBANK = .value) %>% 
  mutate(Price_Index = XBANK / last(df_merged$xbank)*100 %>% 
           round(0),#making 2024 Apr value = 100 to see the changes(%)
         XBANK = round(XBANK, 3),
         Date = yearmonth(Date))

#Making a forecasting table
xbank_fc %>% 
  gt(id = "my_table") %>% 
  data_color(
    method = "numeric",
    palette = c("steelblue", "orange")
  ) %>% 
  cols_align(align = "center", columns = everything()) %>% 
  #separating alignment of column names from cells-alignment
  tab_style(
    style = cell_text(align = "left"), 
    locations = cells_body(
      columns = Date
    )) %>% 
  #separating cell body from each other
  tab_style(
    style = cell_borders(sides = "all", 
                         color = "white",
                         weight = px(12), 
                         style = "solid"),
    locations = cells_body(columns = everything())) %>% 
  #Rounded corners for the entire table
  opt_css(css = "
    #my_table .gt_row {
      border-radius: 30px;
    }
    ") %>% 
  #Changing the column names
  cols_label(
    Date = md("**Date**"),
    XBANK = md("**BIST Banks Index**"),
    Price_Index = md("**XBANK (2024 Apr = 100)**")
  ) %>% 
  tab_header(title = "Forecasting by the end of the year") %>% 
  opt_table_font(font = "Bricolage Grotesque")

According to the above table, the index will continue to lift the trend by the end of the year; especially in May.

Disclaimer: This content is prepared for purely educational purposes, and cannot be considered as investment advice.

To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)