A New Animation of the Spreading of SARS-CoV-2 in Germany
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
Update 2020-11-28: I’ve updated the resulting image with data until Nov 27, 2020.
In August I published first an animation of the spreading of SARS-CoV-2 in Germany.
Some days ago I found a similar visualization for Austria by Matthias Schnetzer. But he uses a different scale. His worst colour is for 150 incidents per 100,000 residents during the last 7 days. (I used 50 as last value of the scale.)
He also provides a link to his code He uses a little different way for plotting. So I thought I should update my animation using his way.
Getting the data
I’m using data from RKI. I’m processing it the same way I do within my shiny app.
Meta data for each Landkreis
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
library(tidyverse)
library(rlang)
library(lubridate)
library(geojsonio)
library(zoo)
library(gganimate)
library(ggmap)
# Maximum values for plotting
max_infections <- 50
max_delta_infections <- 15
cache_filename <- "raw-data.Rda"
get_meta_data_landkreise <- function(force = FALSE) {
landkreise_meta_filename <- "landkreise_meta.Rda"
if(!file.exists(landkreise_meta_filename) | force) {
url_landkreise_full <- "https://opendata.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0.csv"
data_landkreise_detail <- read_csv(url_landkreise_full)
landkreise <- data_landkreise_detail %>%
select(Landkreis = county, IdLandkreis = RS, Bevoelkerung = EWZ, Bundesland = BL) %>%
unique()
save(landkreise, file = landkreise_meta_filename)
} else {
load(landkreise_meta_filename)
}
return(landkreise)
}
|
Geo data of all Landkreise
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
get_geo_data <- function() {
# https://public.opendatasoft.com/explore/dataset/landkreise-in-germany/download/?format=geojson&timezone=Europe/Berlin&lang=en
landkreise_geo <- geojson_read("landkreise-in-germany.geojson",
what = "sp"
)
landkreise_geo@data <- landkreise_geo@data %>%
mutate(
cca_2 = if_else(cca_2 == "03152", "03159", cca_2), # fix for Göttingen
cca_2 = if_else(cca_2 == "03156", "03159", cca_2) # fix for Göttingen
) %>%
select(cca_2)
# Landkreisteile von Göttingen vereinen
landkreise_geo <- raster::aggregate(landkreise_geo, by = 'cca_2')
# Berlin: https://github.com/funkeinteraktiv/Berlin-Geodaten
berlin_geo <- geojson_read("berlin_bezirke.geojson", what = "sp")
berlin_geo@data <- berlin_geo@data %>%
mutate(cca_2 = as.character(11000 + cartodb_id)) %>%
select(cca_2)
# Combine landkreise and bezirke of Berlin
landkreise_geo <- rbind(landkreise_geo, berlin_geo)
landkreise_geo <- landkreise_geo %>% sf::st_as_sf()
return(landkreise_geo)
}
|
Here is a little difference to my shiny app: I convert the geodata into Multipolygons
with the function sf::st_as_sf(). So the resulting data_frame after merging
the geodata and the corona data has a lot less rows.
Loading all data
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
get_initial_landkreis_data <- function(force_refresh = FALSE) {
if (file.exists(cache_filename) & force_refresh == FALSE) {
load(cache_filename)
# refetch <- (data_landkreise_detail_converted %>% pull(MeldedatumKlar) %>% max()) < Sys.Date() - days(1)
force_refresh <- FALSE
} else{
force_refresh <- TRUE
}
# Refresh only every hour
if (force_refresh && file.exists(cache_filename)) {
if(difftime(Sys.time() , file.info(cache_filename)$ctime , units = "mins") < 60) {
force_refresh <- FALSE
load(cache_filename)
}
}
if(force_refresh) {
Sys.setlocale(category = "LC_ALL", locale = "de_DE.UTF-8")
# url_rki <- "https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.csv"
url_rki <- "https://www.arcgis.com/sharing/rest/content/items/f10774f1c63e40168479a1feb6c7ca74/data"
data_landkreise_detail_converted <- read_csv(url_rki) %>%
mutate(
Meldedatum = as.Date(
strptime(Meldedatum, format = "%Y/%m/%d %H:%M:%S")
),
Datenstand = as.Date(
strptime(Datenstand, format = "%d.%m.%Y, %H:%M Uhr")
)
)
save(data_landkreise_detail_converted, file = cache_filename)
}
data_landkreise_per_day <- data_landkreise_detail_converted %>%
filter(NeuerFall %in% c(1, 0)) %>%
complete(Meldedatum = seq(min(Meldedatum), max(Meldedatum), by = "day"),
IdLandkreis, fill = list(AnzahlFall = 0, NeuerFall = 1)) %>%
group_by(IdLandkreis, Meldedatum) %>%
summarize(
infected = sum(AnzahlFall)
) %>%
ungroup() %>%
complete(Meldedatum, IdLandkreis, fill = list(infected = 0)) %>%
arrange(Meldedatum) %>%
group_by(IdLandkreis) %>%
mutate(
infected_7 = rollsum(infected, 7, fill = NA, align = "right"),
infected_7_before = rollsum(lag(infected, n = 7), 7, fill = NA, align = "right"),
delta_7 = infected_7 - infected_7_before,
gesamt = cumsum(infected)
) %>%
left_join(landkreise) %>%
mutate(
infected_7_per_100k = round(infected_7 / Bevoelkerung * 100000, 1),
delta_7_per_100k = round(delta_7 / Bevoelkerung * 100000, 1)
) %>%
mutate(
infected_7_per_100k_fill = if_else(infected_7_per_100k < 0, 0, infected_7_per_100k),
infected_7_per_100k_fill = if_else(infected_7_per_100k > max_infections, max_infections + 1, infected_7_per_100k_fill),
delta_7_per_100k_fill = if_else(delta_7_per_100k > max_delta_infections, max_delta_infections + 1, delta_7_per_100k),
delta_7_per_100k_fill = if_else(delta_7_per_100k < -max_delta_infections, -max_delta_infections - 1, delta_7_per_100k_fill),
)
return(data_landkreise_per_day)
}
landkreise <- get_meta_data_landkreise()
geo <- get_geo_data()
corona <- get_initial_landkreis_data(force_refresh = TRUE) %>%
ungroup() %>%
mutate(infected_7_per_100k = replace_na(infected_7_per_100k, 0))
|
Merging the geo data and corona data
1 2 3 |
mapdata <- geo %>% left_join(corona, by = c("cca_2" = "IdLandkreis")) %>%
select(cca_2, Landkreis, Meldedatum, infected_7_per_100k, geometry) %>%
filter(Meldedatum >= ymd("2020-02-01"))
|
Plotting and animation
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
anim <- mapdata %>%
ggplot() +
geom_sf(aes(fill = infected_7_per_100k), size = 0.3) +
scale_fill_gradientn(colours=c("green","yellow","orange","red","dark red"),
limits=c(0,150), na.value = "dark red",
labels = c("0", "50", "100", ">150")) +
guides(fill=guide_colourbar(title = NULL,
barwidth = 15,
barheight = .5)) +
theme_bw() +
theme(legend.position = "bottom",
axis.text = element_blank()) +
labs(title = "Entwicklung der Covid-Fälle seit Februar 2020",
subtitle = "7-Tage-Inzidenz pro 100.000 Einwohner - Datum: {frame_time}",
caption = "Grafik: @rstats_tips") +
transition_time(Meldedatum)
gif <- gganimate::animate(anim, nframes = length(unique(mapdata$Meldedatum)) + 30,
fps = 10, end_pause = 30,
res = 150, unit = "in", width = 6, height = 5)
gif
anim_save("covid-D-2.gif", animation = gif)
|
Result
Waiting for approx. 90 minutes I get the result:

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.