Mapping multiple trends with confidence

[This article was first published on r.iresmi.net, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

A tutorial to compute trends by groups and plot/map the results

We will use dplyr::nest to create a list-column and will apply a model (with purrr::map) to each row, then we will extract each slope and its p-value with map and broom::tidy.

Setup

library(tidyverse)
library(httr)
library(sf)
library(readxl)
library(janitor)
library(fs)
library(broom)
library(scales)
library(rnaturalearth)
library(rnaturalearthdata)
library(rgeos)

fk <- function(x) format(x, big.mark = " ")

Data

Map data. Départements polygons from OSM.

if(!file_exists("departements-20140528-100m.shp")) {
  f <- tempfile()
  GET("http://osm13.openstreetmap.fr/~cquest/openfla/export/departements-20140528-100m-shp.zip",
      write_disk(f))
  unzip(f)
}

dep <- st_read("departements-20140528-100m.shp")

Population data by département 1990-2008 from INSEE.

if(!file_exists("pop.xls")) {
  GET("https://www.insee.fr/fr/statistiques/fichier/2012713/TCRD_004.xls", 
      write_disk("pop.xls"))
}
  
pop <- read_xls("pop.xls", skip = 3) %>% 
  clean_names() %>% 
  head(-1) %>% 
  rename(insee_dep = x1,
         dep = x2,
         x2018 = x2018_p) %>% 
  select(-4) %>% 
  gather(annee, pop, 3:7) %>% 
  mutate(annee = as.integer(str_replace(annee, "x", "")))

Population trends for each département

pop_model <- function(df) {
  lm(pop ~ annee, data = df)
}

trends <- pop %>% 
  group_by(insee_dep, dep) %>% 
  nest() %>% 
  mutate(model = map(data, pop_model),
         glance = map(model, glance),
         coeff = map(model, tidy, conf.int = TRUE)) 

Plot

trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee",
         !insee_dep %in% c("F", "M")) %>% 
  ggplot(aes(fct_reorder(insee_dep, estimate), estimate,
             color = if_else(p.value <= .05,
                             if_else(estimate >= 0, "positive", "négative"),
                             "n.s."))) +
    geom_point() +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .5) +
    scale_color_manual(name = "Tendance",
                       values = c("positive" = "red",  "n.s." = "lightgray", "négative" = "blue")) +
    scale_y_continuous(labels = fk) +
    labs(title = "Évolution des populations départementales françaises",
         subtitle = "1990-2018",
         x = "dép.",
         y = bquote(Delta[population] ~ (habitant %.% an^{-1})),
         caption = "r.iresmi.net\ndonnées INSEE") +
    guides(color = guide_legend(reverse = TRUE)) +
    theme(plot.caption = element_text(size = 7))
Only 9 départements have a decreasing population

Map

pop_dep <- trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee") %>% 
  right_join(dep, by = c("insee_dep" = "code_insee")) %>%
  left_join(filter(pop, annee == 2018, !insee_dep %in% c("F", "M")), by = "insee_dep") %>% 
  st_as_sf() %>% 
  st_transform(2154) 

moy_fr <- trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee",  !insee_dep %in% c("F", "M")) %>% 
  summarise(mean(estimate, na.rm = TRUE)) %>% 
  pull()

world <- ne_countries(scale = "medium", returnclass = "sf") %>% 
  filter(continent == "Europe") %>% 
  st_transform(2154) 

pop_dep %>% 
  ggplot() +
    geom_sf(data = world, fill = "grey97", color = 0) +
    geom_sf(color = "lightgrey", fill = "floralwhite", size = .2) +
    stat_sf_coordinates(data = filter(pop_dep, p.value > .05),
                        aes(size = pop),
                        fill = "lightgrey", color = "lightgrey", shape = 21, alpha = 0.8) +
    stat_sf_coordinates(data = filter(pop_dep, p.value <= .05),
                        aes(size = pop, fill = estimate),
                        color = "lightgrey", shape = 21, alpha = 0.8) +
    coord_sf(xlim = c(100000, 1200000), ylim = c(6000000, 7200000)) +
    scale_fill_gradient2(name = bquote(atop(displaystyle(atop(Delta ~ population[1990-2018],
                                                               (habitant %.% an^{-1}))), 
                                             moy. == .(round(moy_fr)) ~ ", en gris : n.s.")),
                          labels = fk,
                          low = "darkblue", mid = "white", high = "darkred", midpoint = moy_fr) +
    scale_size_area(name = "habitants (2018)", labels = fk, max_size = 10) +
    labs(title = "Évolution des populations départementales françaises",
         subtitle = "Métropole, 1990-2018",
         x = "",
         y = "",
         caption = "r.iresmi.net\ndonnées INSEE 2018\nfond cartographique : contributeurs Openstreetmap 2014\nNaturalearth") +
    theme_bw() +
    theme(plot.caption = element_text(size = 7),
          legend.text.align = 1)
Population is growing stronger in Paris suburbs and in peripheral southern départements

To leave a comment for the author, please follow the link and comment on their blog: r.iresmi.net.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)