A New Animation of the Spreading of SARS-CoV-2 in Germany

[This article was first published on rstats-tips.net, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Update 2020-11-28: I’ve updated the resulting image with data until Nov 27, 2020.

In August I published first an animation of the spreading of SARS-CoV-2 in Germany.

Some days ago I found a similar visualization for Austria by Matthias Schnetzer. But he uses a different scale. His worst colour is for 150 incidents per 100,000 residents during the last 7 days. (I used 50 as last value of the scale.)

He also provides a link to his code He uses a little different way for plotting. So I thought I should update my animation using his way.

Getting the data

I’m using data from RKI. I’m processing it the same way I do within my shiny app.

Meta data for each Landkreis

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
library(tidyverse)
library(rlang)
library(lubridate)
library(geojsonio)
library(zoo)
library(gganimate)
library(ggmap)

# Maximum values for plotting
max_infections <- 50
max_delta_infections <- 15
cache_filename <- "raw-data.Rda"

get_meta_data_landkreise <- function(force = FALSE) {
  landkreise_meta_filename <- "landkreise_meta.Rda"
  if(!file.exists(landkreise_meta_filename) | force) {
    url_landkreise_full <- "https://opendata.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0.csv"
    data_landkreise_detail <- read_csv(url_landkreise_full)
    
    landkreise <- data_landkreise_detail %>% 
      select(Landkreis = county, IdLandkreis = RS, Bevoelkerung = EWZ, Bundesland = BL) %>%
      unique()
    save(landkreise, file = landkreise_meta_filename)
  } else {
    load(landkreise_meta_filename)
  }
  return(landkreise)
}

Geo data of all Landkreise

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
get_geo_data <- function() {
  
  
  # https://public.opendatasoft.com/explore/dataset/landkreise-in-germany/download/?format=geojson&timezone=Europe/Berlin&lang=en
  landkreise_geo <- geojson_read("landkreise-in-germany.geojson",
                                 what = "sp"
  )
  
  landkreise_geo@data <- landkreise_geo@data %>%
    mutate(
      cca_2 = if_else(cca_2 == "03152", "03159", cca_2), # fix for Göttingen
      cca_2 = if_else(cca_2 == "03156", "03159", cca_2) # fix for Göttingen
    ) %>%
    select(cca_2)
  
  # Landkreisteile von Göttingen vereinen
  landkreise_geo <- raster::aggregate(landkreise_geo, by = 'cca_2') 
  
  # Berlin: https://github.com/funkeinteraktiv/Berlin-Geodaten
  berlin_geo <- geojson_read("berlin_bezirke.geojson", what = "sp")
  berlin_geo@data <- berlin_geo@data %>% 
    mutate(cca_2 = as.character(11000 + cartodb_id)) %>% 
    select(cca_2)
  
  
  # Combine landkreise and bezirke of Berlin
  landkreise_geo <- rbind(landkreise_geo, berlin_geo)
  
  landkreise_geo <- landkreise_geo %>% sf::st_as_sf()
  
  return(landkreise_geo)
}

Here is a little difference to my shiny app: I convert the geodata into Multipolygons with the function sf::st_as_sf(). So the resulting data_frame after merging the geodata and the corona data has a lot less rows.

Loading all data

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
get_initial_landkreis_data <- function(force_refresh = FALSE) {
  
  if (file.exists(cache_filename) & force_refresh == FALSE) {
    load(cache_filename)
    # refetch <- (data_landkreise_detail_converted %>% pull(MeldedatumKlar) %>% max()) < Sys.Date() - days(1)
    force_refresh <- FALSE
  } else{
    force_refresh <- TRUE
  }
  
  # Refresh only every hour
  if (force_refresh && file.exists(cache_filename)) {
    if(difftime(Sys.time() , file.info(cache_filename)$ctime , units = "mins") < 60) {
      force_refresh <- FALSE
      load(cache_filename)
    }
  }
  
  if(force_refresh) {
    Sys.setlocale(category = "LC_ALL", locale = "de_DE.UTF-8")
    # url_rki <- "https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.csv"
    url_rki <- "https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data"
    data_landkreise_detail_converted <- read_csv(url_rki) %>%
      mutate(
        Meldedatum = as.Date(
          strptime(Meldedatum, format = "%Y/%m/%d %H:%M:%S")
        ),
        Datenstand = as.Date(
          strptime(Datenstand, format = "%d.%m.%Y, %H:%M Uhr")
        )
      )    
    
    save(data_landkreise_detail_converted, file = cache_filename)
  }
  
  data_landkreise_per_day <- data_landkreise_detail_converted %>%
    filter(NeuerFall %in% c(1, 0)) %>%
    complete(Meldedatum = seq(min(Meldedatum), max(Meldedatum), by = "day"), 
             IdLandkreis, fill = list(AnzahlFall = 0, NeuerFall = 1)) %>%
    group_by(IdLandkreis, Meldedatum) %>%
    summarize(
      infected = sum(AnzahlFall)
    ) %>% 
    ungroup() %>%
    complete(Meldedatum, IdLandkreis, fill = list(infected = 0)) %>%
    arrange(Meldedatum) %>%
    group_by(IdLandkreis) %>%
    mutate(
      infected_7 = rollsum(infected, 7, fill = NA, align = "right"),
      infected_7_before = rollsum(lag(infected, n = 7), 7, fill = NA, align = "right"),
      delta_7 = infected_7 - infected_7_before,
      gesamt = cumsum(infected)
    ) %>% 
    left_join(landkreise) %>% 
    mutate(
      infected_7_per_100k = round(infected_7 / Bevoelkerung * 100000, 1),
      delta_7_per_100k = round(delta_7 / Bevoelkerung * 100000, 1)
    ) %>%
    mutate(
      infected_7_per_100k_fill = if_else(infected_7_per_100k < 0, 0, infected_7_per_100k),
      infected_7_per_100k_fill = if_else(infected_7_per_100k > max_infections, max_infections + 1, infected_7_per_100k_fill),
      delta_7_per_100k_fill = if_else(delta_7_per_100k > max_delta_infections, max_delta_infections + 1, delta_7_per_100k),
      delta_7_per_100k_fill = if_else(delta_7_per_100k < -max_delta_infections, -max_delta_infections - 1, delta_7_per_100k_fill),
    )
  return(data_landkreise_per_day)
}

landkreise <- get_meta_data_landkreise()
geo <- get_geo_data()
corona <- get_initial_landkreis_data(force_refresh = TRUE) %>% 
  ungroup() %>% 
  mutate(infected_7_per_100k = replace_na(infected_7_per_100k, 0))

Merging the geo data and corona data

1
2
3
mapdata <- geo %>% left_join(corona, by = c("cca_2" = "IdLandkreis")) %>% 
  select(cca_2, Landkreis, Meldedatum, infected_7_per_100k, geometry) %>% 
  filter(Meldedatum  >= ymd("2020-02-01"))

Plotting and animation

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
anim <- mapdata %>%
  ggplot() +
  geom_sf(aes(fill = infected_7_per_100k), size = 0.3) +
  scale_fill_gradientn(colours=c("green","yellow","orange","red","dark red"), 
                       limits=c(0,150), na.value = "dark red",
                       labels = c("0", "50", "100", ">150")) +
  guides(fill=guide_colourbar(title = NULL,
                              barwidth = 15,
                              barheight = .5)) +
  theme_bw() + 
  theme(legend.position = "bottom",
        axis.text = element_blank()) +
  labs(title = "Entwicklung der Covid-Fälle seit Februar 2020", 
       subtitle = "7-Tage-Inzidenz pro 100.000 Einwohner - Datum: {frame_time}",
       caption = "Grafik: @rstats_tips") +
  transition_time(Meldedatum)


gif <- gganimate::animate(anim, nframes = length(unique(mapdata$Meldedatum)) + 30, 
                          fps = 10, end_pause = 30, 
                          res = 150, unit = "in", width = 6, height = 5)
gif

anim_save("covid-D-2.gif", animation = gif)

Result

Waiting for approx. 90 minutes I get the result:

To leave a comment for the author, please follow the link and comment on their blog: rstats-tips.net.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)