Mapping multiple trends with confidence

February 6, 2019
By

(This article was first published on r.iresmi.net, and kindly contributed to R-bloggers)

A tutorial to compute trends by groups and plot/map the results

We will use dplyr::nest to create a list-column and will apply a model (with purrr::map) to each row, then we will extract each slope and its p-value with map and broom::tidy.

Setup

library(tidyverse)
library(httr)
library(sf)
library(readxl)
library(janitor)
library(fs)
library(broom)
library(scales)
library(rnaturalearth)
library(rnaturalearthdata)
library(rgeos)

fk <- function(x) format(x, big.mark = " ")

Data

Map data. Départements polygons from OSM.

if(!file_exists("departements-20140528-100m.shp")) {
  f <- tempfile()
  GET("http://osm13.openstreetmap.fr/~cquest/openfla/export/departements-20140528-100m-shp.zip",
      write_disk(f))
  unzip(f)
}

dep <- st_read("departements-20140528-100m.shp")

Population data by département 1990-2008 from INSEE.

if(!file_exists("pop.xls")) {
  GET("https://www.insee.fr/fr/statistiques/fichier/2012713/TCRD_004.xls", 
      write_disk("pop.xls"))
}
  
pop <- read_xls("pop.xls", skip = 3) %>% 
  clean_names() %>% 
  head(-1) %>% 
  rename(insee_dep = x1,
         dep = x2,
         x2018 = x2018_p) %>% 
  select(-4) %>% 
  gather(annee, pop, 3:7) %>% 
  mutate(annee = as.integer(str_replace(annee, "x", "")))

Population trends for each département

pop_model <- function(df) {
  lm(pop ~ annee, data = df)
}

trends <- pop %>% 
  group_by(insee_dep, dep) %>% 
  nest() %>% 
  mutate(model = map(data, pop_model),
         glance = map(model, glance),
         coeff = map(model, tidy, conf.int = TRUE)) 

Plot

trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee",
         !insee_dep %in% c("F", "M")) %>% 
  ggplot(aes(fct_reorder(insee_dep, estimate), estimate,
             color = if_else(p.value <= .05,
                             if_else(estimate >= 0, "positive", "négative"),
                             "n.s."))) +
    geom_point() +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = .5) +
    scale_color_manual(name = "Tendance",
                       values = c("positive" = "red",  "n.s." = "lightgray", "négative" = "blue")) +
    scale_y_continuous(labels = fk) +
    labs(title = "Évolution des populations départementales françaises",
         subtitle = "1990-2018",
         x = "dép.",
         y = bquote(Delta[population] ~ (habitant %.% an^{-1})),
         caption = "r.iresmi.net\ndonnées INSEE") +
    guides(color = guide_legend(reverse = TRUE)) +
    theme(plot.caption = element_text(size = 7))
Only 9 départements have a decreasing population

Map

pop_dep <- trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee") %>% 
  right_join(dep, by = c("insee_dep" = "code_insee")) %>%
  left_join(filter(pop, annee == 2018, !insee_dep %in% c("F", "M")), by = "insee_dep") %>% 
  st_as_sf() %>% 
  st_transform(2154) 

moy_fr <- trends %>% 
  unnest(coeff) %>% 
  filter(term == "annee",  !insee_dep %in% c("F", "M")) %>% 
  summarise(mean(estimate, na.rm = TRUE)) %>% 
  pull()

world <- ne_countries(scale = "medium", returnclass = "sf") %>% 
  filter(continent == "Europe") %>% 
  st_transform(2154) 

pop_dep %>% 
  ggplot() +
    geom_sf(data = world, fill = "grey97", color = 0) +
    geom_sf(color = "lightgrey", fill = "floralwhite", size = .2) +
    stat_sf_coordinates(data = filter(pop_dep, p.value > .05),
                        aes(size = pop),
                        fill = "lightgrey", color = "lightgrey", shape = 21, alpha = 0.8) +
    stat_sf_coordinates(data = filter(pop_dep, p.value <= .05),
                        aes(size = pop, fill = estimate),
                        color = "lightgrey", shape = 21, alpha = 0.8) +
    coord_sf(xlim = c(100000, 1200000), ylim = c(6000000, 7200000)) +
    scale_fill_gradient2(name = bquote(atop(displaystyle(atop(Delta ~ population[1990-2018],
                                                               (habitant %.% an^{-1}))), 
                                             moy. == .(round(moy_fr)) ~ ", en gris : n.s.")),
                          labels = fk,
                          low = "darkblue", mid = "white", high = "darkred", midpoint = moy_fr) +
    scale_size_area(name = "habitants (2018)", labels = fk, max_size = 10) +
    labs(title = "Évolution des populations départementales françaises",
         subtitle = "Métropole, 1990-2018",
         x = "",
         y = "",
         caption = "r.iresmi.net\ndonnées INSEE 2018\nfond cartographique : contributeurs Openstreetmap 2014\nNaturalearth") +
    theme_bw() +
    theme(plot.caption = element_text(size = 7),
          legend.text.align = 1)
Population is growing stronger in Paris suburbs and in peripheral southern départements

To leave a comment for the author, please follow the link and comment on their blog: r.iresmi.net.

R-bloggers.com offers daily e-mail updates about R news and tutorials on topics such as: Data science, Big Data, R jobs, visualization (ggplot2, Boxplots, maps, animation), programming (RStudio, Sweave, LaTeX, SQL, Eclipse, git, hadoop, Web Scraping) statistics (regression, PCA, time series, trading) and more...



If you got this far, why not subscribe for updates from the site? Choose your flavor: e-mail, twitter, RSS, or facebook...

Comments are closed.

Search R-bloggers

Sponsors

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)