JAGS Simulation with Multivariate State-Space Model: The G7 on Food Security

[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

The 49th G7 summit was held recently in Japan. Ukraine was one of the most critical issues at the meeting; most of the session topic was related to problems stemming from Russia’s invasion of Ukraine. One of the problems, aforementioned is food security. Because of the war, energy prices have been up. And that has stimulated food inflation. Of course, that is not the only reason but one of the important ones, like the disruption of the food supply chain.

In this article, we will model the food and energy inflation of the G7 countries since the 1990s and make their 5-year projection. We will use the multivariate state-space model with JAGS simulation for that purpose.

First, we will build our data set for the model. We will use food and energy annual CPI rates with the 2015 base-year, from OECD.

library(tidyverse)
library(rjags)
library(R2jags)
library(ggalt)
library(systemfonts)
library(showtext)

#Loading and wrangling the dataset
df <- read_csv("https://raw.githubusercontent.com/mesdi/blog/main/g7_cpi.csv")

df_tidy <- 
  df %>% 
  pivot_wider(names_from = subject,
              values_from = value) %>% 
  janitor::clean_names() 
  


#Matrix data for jags model  
df_marss <- 
  df_tidy %>% 
  pivot_longer(c("food","enrg"),
               names_to = "subject") %>% 
  pivot_wider(names_from = time,
              values_from = value) %>% 
  column_to_rownames("subject") %>%
  #Unseen data for forecasting
  add_column("2023" = NA,
             "2024" = NA,
             "2025" = NA,
             "2026" = NA,
             "2027" = NA) %>% 
  as.matrix()

We will add multiple hidden states to our model.

#MARSS with m hidden states 
jagsscript <- cat("
model {  
   # process model priors
   inv.q~dgamma(0.001,0.001);
   q <- 1/inv.q; # one q
   for(i in 1:n) {
      u[i] ~ dnorm(0, 0.01); 
      X0[i] ~ dnorm(Y1[i],0.001); # initial states
   }
   # process model likelihood
   for(i in 1:n) {
     EX[i,1] <- X0[i] + u[i];
     X[i,1] ~ dnorm(EX[i,1], inv.q);
   }
   for(t in 2:N) {
      for(i in 1:n) {
         EX[i,t] <- X[i,t-1] + u[i];
         X[i,t] ~ dnorm(EX[i,t], inv.q);
      }
   }

   # observation model priors
   for(i in 1:n) { # The r's are different by site
     inv.r[i]~dgamma(0.001,0.001);
     r[i] <- 1/inv.r[i];
   }
   # observation model likelihood
   for(t in 1:N) {
     for(i in 1:n) {
       EY[i,t] <- X[i,t]
       Y[i,t] ~ dnorm(EY[i,t], inv.r[i]);
     }
   }
}  

", 
                  file = "marss-jags2.txt")



jags.data <- list(Y = df_marss, 
                  n = nrow(df_marss), 
                  N = ncol(df_marss), 
                  Y1 = df_marss[,1])

jags.params <- c("EY", "u", "q", "r")

model.loc <- "marss-jags2.txt"  # name of the txt file

#Modeling
set.seed(123)
mod_marss2 <- R2jags::jags(jags.data, 
                           parameters.to.save = jags.params, 
                           model.file = model.loc, 
                           n.chains = 3, 
                           n.burnin = 5000, 
                           n.thin = 1, 
                           n.iter = 10000, 
                           DIC = TRUE)


EY <- mod_marss2$BUGSoutput$sims.list$EY
n <- nrow(df_marss)
N <- ncol(df_marss)
tmp_list <- list()

for (i in 1:n) {
  tmp <- data.frame(n = paste0("Y", i), 
                    x = 1:N, 
                    ey = apply(EY[, i, , drop = FALSE], 
                               3, 
                               median), 
                    ey.low = apply(EY[, i, , drop = FALSE], 
                                   3, 
                                   quantile, 
                                   probs = 0.25), 
                    ey.up = apply(EY[, i, , drop = FALSE], 
                                  3, 
                                  quantile, 
                                  probs = 0.75), 
                    y = df_marss[i,])
  
  
  tmp_list[i] <- list(tmp) 
  
  df_jags <- 
    tmp_list %>% map(rownames_to_column,"time") %>% 
    do.call(what=rbind) %>% 
    as_tibble() 
  

}

After we’ve built our fitted data frame, we can compare the performance of our model to the datasets on a faceted graph.

#Comparing the MARSS predictions with actual observations 

#Google font setting  
font_add_google("Lato","lato")  
showtext_auto()

ggplot(data = 
         df_jags %>% 
         drop_na(), #removes the unseen data points
       aes(x = time, 
           y = ey,
           group = n)) + 
  geom_line(color = "blue",
            size = 1.2) + 
  geom_ribbon(aes(ymin = ey.low,
                  ymax = ey.up), 
              alpha = 0.25,
              fill = "blue") + 
  geom_point(aes(x = x, 
                 y = y),
             color = "red",
             size = 2) +
  facet_wrap(~n,
             scales = "free_y",
             #conditional-labeling facets
             labeller = labeller(n = c(Y1 = "Food CPI <span style = 'color:blue;'> prediction intervals</span><br>and their<span style = 'color:red;'> actual values </span> in G7 countries", 
                                       Y2 = "Energy CPI <span style = 'color:blue;'> prediction intervals</span><br>and their<span style = 'color:red;'> actual values </span> in G7 countries"
                                       ))) + 
  scale_x_discrete(breaks = seq(1990, 2022, 10)) +
  scale_y_continuous(labels = scales::label_number(suffix = "%")) +
  xlab("") + 
  ylab("") +
  theme_bw(base_family = "lato",
           base_size = 20) +
  theme(strip.background = element_rect(fill = "#ccc410"),
        strip.text = ggtext::element_markdown(face = "bold"))

The food graph shows that the model is well-fitting the data. But the energy graph has a much more moderate performance. The reason might be that there are many negative values, and the distribution is very volatile. The below code chunk indicates the exact accuracy results.

#Accuracy

# for food cpi
EY_food <- 
  df_jags %>% 
  drop_na() %>% #removes the unseen data
  filter(n=="Y1") %>% 
  select(ey, y) 

cor(EY_food$y, EY_food$ey)
#[1] 0.9990204

# for energy cpi
EY_energy <- 
  df_jags %>% 
  drop_na() %>% #removes the unseen data
  filter(n=="Y2") %>% 
  select(ey, y) 

cor(EY_energy$y, EY_energy$ey)
#[1] 0.6223109

Now, we can draw the prediction intervals for the next five years on a faceted plot for each dataset.

#5-year projection of Food CPI and Energy CPI, in the G7

#Forecasted data
df_fc <- 
  df_jags %>%
  group_by(n) %>%
  slice_max(time, n=5) %>% 
  ungroup()
  

df_fc %>%
  ggplot(aes(x = ey.low, 
             xend = ey.up, 
             y = time, 
             group = n)) +
  geom_dumbbell(
    colour="#a3c4dc",
    colour_xend="#0e668b",
    size= 2) +
  geom_text(color = "black", 
            size = 4, 
            hjust = 1.3,
            aes(x = ey.low, 
                label = round(ey.low, 2)))+
  geom_text(aes(x = ey.up, 
                label = round(ey.up, 2)), 
            color = "black", 
            size = 4, 
            hjust = -0.2) +
  facet_wrap(~n,
             scales = "free", ncol = 1,
             #conditional-labeling facets
             labeller = labeller(n = c(Y1 = "Food CPI <span style = 'color:#0e668b;'> forecasting intervals</span> in G7 countries", 
                                       Y2 = "Energy CPI <span style = 'color:#0e668b;'> forecasting intervals</span> in G7 countries"
             ))) +
  scale_x_continuous(labels = scales::percent_format(scale = 1)) +
  theme_bw(base_family = "lato",
           base_size = 20) +
  theme(strip.background = element_rect(fill = "#ccc410"),
        strip.text = ggtext::element_markdown(face = "bold"),
        axis.title = element_blank(),
        panel.grid = element_blank())

When we look at the graphs above, it is seen that the food CPI rates might not fall below 6% in the next five years. As for the energy CPI rates, negative values might not be seen for a while.

To leave a comment for the author, please follow the link and comment on their blog: DataGeeek.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)