% my_skim() %>% select(-skim_type) %>% gt() %>% cols_label(n_missing = "# Missing", complete_rate = "Completeness", numeric.mean = "Mean", numeric.sd = "Standard Deviation", numeric.p0 = "Min", numeric.p100 = "Max", numeric.hist = "Histogram") %>% opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>% tab_header(title = "Northern Hemisphere Temperatures by Year") Northern Hemisphere Temperatures by Year skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram Year 0 1.0000000 1.951500e+03 41.7133072 1880.00 2023.00 β–‡β–‡β–‡β–‡β–‡ Jan 0 1.0000000 8.576389e-02 0.5778724 -1.52 1.59 ▁▅▇▅▂ Feb 0 1.0000000 1.009722e-01 0.5789589 -0.98 1.94 ▅▇▆▂▁ Mar 0 1.0000000 1.282639e-01 0.5701347 -0.80 1.91 ▅▇▃▂▁ Apr 0 1.0000000 9.756944e-02 0.4845865 -0.65 1.48 β–†β–‡β–ƒβ–‚β–‚ May 0 1.0000000 9.055556e-02 0.4203525 -0.73 1.28 β–‚β–‡β–…β–‚β–‚ Jun 1 0.9930556 7.951049e-02 0.4080447 -0.52 1.21 ▅▇▂▂▁ Jul 1 0.9930556 7.811189e-02 0.3845263 -0.59 1.10 ▃▇▃▂▁ Aug 1 0.9930556 6.384615e-02 0.4130920 -0.77 1.17 β–‚β–‡β–†β–‚β–‚ Sep 1 0.9930556 8.013986e-02 0.4194748 -0.80 1.22 ▁▇▅▂▂ Oct 1 0.9930556 1.309790e-01 0.4552020 -0.84 1.32 β–‚β–‡β–†β–‚β–‚ Nov 1 0.9930556 1.152448e-01 0.4939320 -0.83 1.61 ▃▇▅▂▁ Dec 1 0.9930556 5.692308e-02 0.5146409 -1.14 1.53 ▁▇▇▂▂ J-D 1 0.9930556 8.888112e-02 0.4444241 -0.57 1.35 ▆▇▂▂▁ D-N 2 0.9861111 9.042254e-02 0.4448506 -0.58 1.37 ▆▇▂▂▁ DJF 1 0.9930556 8.377622e-02 0.5291899 -1.05 1.67 β–‚β–‡β–†β–‚β–‚ MAM 0 1.0000000 1.055556e-01 0.4791598 -0.71 1.50 ▅▇▃▂▁ JJA 1 0.9930556 7.370629e-02 0.3969650 -0.54 1.12 β–…β–‡β–‚β–‚β–‚ SON 1 0.9930556 1.086713e-01 0.4451210 -0.72 1.34 ▃▇▅▂▁ And southern hemisphere data. sh_temps %>% my_skim() %>% select(-skim_type) %>% gt() %>% cols_label(n_missing = "# Missing", complete_rate = "Completeness", numeric.mean = "Mean", numeric.sd = "Standard Deviation", numeric.p0 = "Min", numeric.p100 = "Max", numeric.hist = "Histogram") %>% opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>% tab_header(title = "Southern Hemisphere Temperatures by Year") Southern Hemisphere Temperatures by Year skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram Year 0 1.0000000 1951.50000000 41.7133072 1880.00 2023.00 β–‡β–‡β–‡β–‡β–‡ Jan 0 1.0000000 0.03875000 0.3165414 -0.63 0.80 ▁▇▅▃▂ Feb 0 1.0000000 0.03840278 0.3154565 -0.59 0.79 β–‚β–‡β–…β–ƒβ–‚ Mar 0 1.0000000 0.04750000 0.3274280 -0.59 0.81 β–‚β–‡β–ƒβ–ƒβ–‚ Apr 0 1.0000000 0.02930556 0.3421562 -0.63 0.98 ▃▇▅▃▁ May 0 1.0000000 0.01631944 0.3613025 -0.60 0.91 β–…β–‡β–…β–ƒβ–‚ Jun 1 0.9930556 -0.01083916 0.3535683 -0.67 0.81 β–ƒβ–‡β–…β–ƒβ–‚ Jul 1 0.9930556 0.03573427 0.3354343 -0.47 0.86 β–‡β–‡β–ƒβ–ƒβ–‚ Aug 1 0.9930556 0.04594406 0.3448540 -0.46 0.92 β–‡β–†β–ƒβ–ƒβ–‚ Sep 1 0.9930556 0.03748252 0.3357448 -0.50 0.91 β–‡β–‡β–ƒβ–ƒβ–‚ Oct 1 0.9930556 0.03937063 0.3242656 -0.51 0.91 ▆▇▆▃▁ Nov 1 0.9930556 0.04160839 0.3082542 -0.53 0.80 β–‚β–‡β–ƒβ–ƒβ–‚ Dec 1 0.9930556 0.04713287 0.3133837 -0.55 0.80 β–ƒβ–‡β–ƒβ–ƒβ–‚ J-D 1 0.9930556 0.03244755 0.3138072 -0.48 0.75 β–…β–‡β–‚β–ƒβ–‚ D-N 2 0.9861111 0.03218310 0.3153409 -0.50 0.75 β–…β–‡β–ƒβ–ƒβ–‚ DJF 1 0.9930556 0.04153846 0.3125887 -0.58 0.80 β–‚β–‡β–ƒβ–ƒβ–‚ MAM 0 1.0000000 0.03069444 0.3380438 -0.60 0.85 β–ƒβ–‡β–…β–…β–‚ JJA 1 0.9930556 0.02440559 0.3347516 -0.53 0.77 β–†β–‡β–ƒβ–…β–‚ SON 1 0.9930556 0.03965035 0.3150909 -0.47 0.74 β–…β–‡β–ƒβ–ƒβ–‚ And data broken down into finer zones from north to south. zonann_temps %>% my_skim() %>% select(-skim_type) %>% gt() %>% cols_label(n_missing = "# Missing", complete_rate = "Completeness", numeric.mean = "Mean", numeric.sd = "Standard Deviation", numeric.p0 = "Min", numeric.p100 = "Max", numeric.hist = "Histogram") %>% opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>% tab_header(title = "Yearly Temperatures by Zone") Yearly Temperatures by Zone skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram Year 0 1 1.951000e+03 41.4246304 1880.00 2022.00 β–‡β–‡β–‡β–‡β–‡ Glob 0 1 6.020979e-02 0.3698448 -0.48 1.02 β–†β–‡β–ƒβ–‚β–‚ NHem 0 1 8.888112e-02 0.4444241 -0.57 1.35 ▆▇▂▂▁ SHem 0 1 3.244755e-02 0.3138072 -0.48 0.75 β–…β–‡β–‚β–ƒβ–‚ 24N-90N 0 1 1.090210e-01 0.5357058 -0.67 1.67 ▆▇▂▂▁ 24S-24N 0 1 7.041958e-02 0.3459949 -0.63 1.01 ▂▇▅▃▁ 90S-24S 0 1 -2.517483e-03 0.3105594 -0.48 0.71 β–†β–‡β–ƒβ–…β–‚ 64N-90N 0 1 2.502098e-01 1.0028287 -1.77 3.24 ▂▇▅▂▁ 44N-64N 0 1 1.305594e-01 0.5884089 -0.79 1.82 ▆▇▃▃▁ 24N-44N 0 1 3.643357e-02 0.4192891 -0.63 1.27 ▃▇▂▂▁ EQU-24N 0 1 6.237762e-02 0.3467758 -0.67 0.97 β–‚β–‡β–†β–ƒβ–‚ 24S-EQU 0 1 7.818182e-02 0.3520816 -0.59 1.07 ▃▇▅▃▁ 44S-24S 0 1 4.314685e-02 0.3300329 -0.44 0.80 β–‡β–‡β–…β–ƒβ–ƒ 64S-44S 0 1 -5.657343e-02 0.2739233 -0.54 0.44 β–†β–‡β–†β–†β–† 90S-64S 0 1 -8.265734e-02 0.7609316 -2.60 1.22 ▁▂▃▇▅ The data sets are all over 0.99% complete. Handling missing values by dropping them is reasonable rather than imputing them. Tidying the Data I’ll note that the data isn’t tidy. In the tidy paradigm, each row corresponds to an observation. Here, multiple observations are included in each row and information is encoded in the column names. This dataset is in a wide format, rather than a long format, presumably to make it easy to compute the yearly and seasonal quarterly data. The tidyr package’s functions pivot_longer (and the converse, pivot_wider) are useful ways to reshape data. Reshaping Data with pivot_longer My plan is to look at the global temperature by month and then animate across years. This is is original wide data. head(global_temps) # A tibble: 6 Γ— 19 Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec 1 1880 -0.19 -0.25 -0.09 -0.17 -0.1 -0.21 -0.18 -0.11 -0.15 -0.24 -0.22 -0.18 2 1881 -0.2 -0.15 0.03 0.05 0.05 -0.19 0 -0.04 -0.16 -0.22 -0.19 -0.08 3 1882 0.16 0.13 0.04 -0.16 -0.14 -0.22 -0.17 -0.08 -0.15 -0.24 -0.17 -0.36 4 1883 -0.3 -0.37 -0.13 -0.19 -0.18 -0.08 -0.08 -0.14 -0.23 -0.12 -0.24 -0.11 5 1884 -0.13 -0.09 -0.37 -0.4 -0.34 -0.35 -0.31 -0.28 -0.28 -0.25 -0.34 -0.31 6 1885 -0.59 -0.34 -0.27 -0.42 -0.45 -0.44 -0.34 -0.32 -0.29 -0.24 -0.24 -0.11 # β„Ή 6 more variables: `J-D` , `D-N` , DJF , MAM , # JJA , SON I’m dropping the seasonal data and only retaining the monthly data. I then reshape it into three columns: year, month, and change in temperature. I’m dropping the na values; the default is to retain them, so values_drop_na = TRUE should be set explicitly. global_temps_reshaped % select(Year:Dec) %>% pivot_longer(Jan:Dec, names_to = "month", values_to = "Delta_temp", values_drop_na = TRUE) head(global_temps_reshaped) # A tibble: 6 Γ— 3 Year month Delta_temp 1 1880 Jan -0.19 2 1880 Feb -0.25 3 1880 Mar -0.09 4 1880 Apr -0.17 5 1880 May -0.1 6 1880 Jun -0.21 This reshaped data has 1721 rows compared to the 144 original rows- definitely longer! Just double check the completeness… skim(global_temps_reshaped) Data summary Name global_temps_reshaped Number of rows 1721 Number of columns 3 _______________________ Column type frequency: character 1 numeric 2 ________________________ Group variables None Variable type: character skim_variable n_missing complete_rate min max empty n_unique whitespace month 0 1 3 3 0 12 0 Variable type: numeric skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist Year 0 1 1951.21 41.41 1880.00 1915.00 1951.00 1987.00 2023.00 β–‡β–‡β–‡β–‡β–‡ Delta_temp 0 1 0.06 0.39 -0.82 -0.22 -0.03 0.29 1.37 ▁▇▃▂▁ Setting Classes and Factor levels Note that our new month column should be a factor. I need to provide the levels otherwise ggplot will plot the months in alphabetical order. It turns out that base R has a few built in constants and abbreviated months is one of them. (English only, I’m sorry.) This saves some typing! global_temps_reshaped$month % filter(Year == 1890) %>% ggplot(aes(month, Delta_temp, fill = Delta_temp)) + geom_col() + scale_fill_viridis(option = "turbo", name = "\U0394 T (\U00B0 C)") + ylim(-1.5, 1.5) + labs(x = "Month", y = "\U0394 Temperature (\U00B0 C)") + labs(title = "Global Deviations in Temperature") + labs(subtitle = "Year: 1890") + labs(caption = "Data from: NASA GISTEMP v4 via #TidyTuesday") + theme_pander(12) Final Version I went back and forth between the pander and the classic theme for the graph. Both have a white background, but pander has a grid and classic doesn’t. I decided on classic in the end. I ended up moving the caption to the left; when it was right aligned it looked like it was supposed to be aligned with the x axis label, but wasn’t. global_temps_reshaped %>% ggplot(aes(month, Delta_temp, fill = Delta_temp)) + geom_col() + scale_fill_viridis(option = "turbo", name = "\U0394 T (\U00B0 C)") + ylim(-1.4, 1.4) + labs(x = "Month", y = "\U0394 Temperature (\U00B0 C)") + labs(caption = "Data from: NASA GISTEMP v4 via #TidyTuesday") + theme_classic(12) + theme(plot.title = element_text(hjust = 0.5)) + theme(plot.caption = element_text(hjust = 0)) + labs(title = "Global Deviations in Temperature", subtitle = "Year: {frame_time}") + transition_time(Year) And saving the figure. anim_save("thumbnail.gif") CitationBibTeX citation:@online{e. sinks2023, author = {E. Sinks, Louise}, title = {TidyTuesday {Week} 28: {Global} {Surface} {Temperature}}, date = {2023-07-11}, url = {https://lsinks.github.io/posts/2023-07-11-tidytuesday-temps/temperatures}, langid = {en} } For attribution, please cite this work as: E. Sinks, Louise. 2023. β€œTidyTuesday Week 28: Global Surface Temperature.” July 11, 2023. https://lsinks.github.io/posts/2023-07-11-tidytuesday-temps/temperatures. " />

TidyTuesday Week 28: Global Surface Temperature

[This article was first published on Louise E. Sinks, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Global Temperature #TidyTuesday

Today’s TidyTuesday is on global surface temperatures. The source of the data is Β NASA GISS Surface Temperature Analysis (GISTEMP v4) and more details about this data set can be found in this published paper: Lenssen, N., G. Schmidt, J. Hansen, M. Menne, A. Persin, R. Ruedy, and D. Zyss, 2019: Improvements in the GISTEMP uncertainty model. J. Geophys. Res. Atmos., 124, no. 12, 6307-6326, doi:10.1029/2018JD029522.

I saw this lovely animated plot on Twitter and decided that I also wanted to make an aminated plot. I use a somewhat different approach though.

Loading the Libraries and Data

library(tidyverse) # who doesn't want to be tidy
library(gt) # nice tables
library(skimr) # inspect missingness and range of data
library(gganimate) # animate ggplots
library(ggthemes) # more themes for ggplots
library(viridis) # extra color scales

Loading the TidyTuesday in the normal way.

tuesdata <- tidytuesdayR::tt_load(2023, week = 28)

global_temps <- tuesdata$global_temps
nh_temps <- tuesdata$nh_temps
sh_temps <- tuesdata$sh_temps
zonann_temps <- tuesdata$zonann_temps

Reformat and Clean the Data

Inspecting the Data

First, I will check the completeness of the data. All the data is numeric, so I’m going to use a custom skim function that omits some of the quartile data. I go over how to create a custom skim function here. The data comes as 4 separate tibbles.

my_skim <- skim_with(numeric = sfl(p25 = NULL, p50 = NULL, p75 = NULL))

global_temps %>% my_skim() %>% select(-skim_type)   %>% gt() %>%
  cols_label(n_missing = "# Missing", complete_rate = "Completeness", 
             numeric.mean = "Mean", numeric.sd = "Standard Deviation",
             numeric.p0 = "Min", numeric.p100 = "Max",
             numeric.hist = "Histogram") %>%
  opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>%
  tab_header(title = "Global Temperatures by Year") 
Global Temperatures by Year
skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram
Year 0 1.0000000 1.951500e+03 41.7133072 1880.00 2023.00 β–‡β–‡β–‡β–‡β–‡
Jan 0 1.0000000 6.333333e-02 0.4235976 -0.81 1.18 β–‚β–‡β–†β–ƒβ–‚
Feb 0 1.0000000 7.090278e-02 0.4285133 -0.63 1.37 ▅▇▃▂▁
Mar 0 1.0000000 8.888889e-02 0.4337898 -0.63 1.36 ▅▇▃▂▁
Apr 0 1.0000000 6.368056e-02 0.3966093 -0.58 1.13 β–†β–‡β–…β–ƒβ–‚
May 0 1.0000000 5.291667e-02 0.3778942 -0.55 1.02 β–†β–‡β–…β–‚β–‚
Jun 1 0.9930556 3.314685e-02 0.3673629 -0.52 0.93 β–†β–‡β–ƒβ–‚β–‚
Jul 1 0.9930556 5.587413e-02 0.3475312 -0.51 0.94 β–…β–‡β–‚β–‚β–‚
Aug 1 0.9930556 5.440559e-02 0.3633037 -0.55 1.02 β–ƒβ–‡β–ƒβ–‚β–‚
Sep 1 0.9930556 5.818182e-02 0.3601988 -0.58 0.99 β–‚β–‡β–ƒβ–‚β–‚
Oct 1 0.9930556 8.419580e-02 0.3692902 -0.58 1.09 ▃▇▃▂▁
Nov 1 0.9930556 7.776224e-02 0.3761975 -0.55 1.11 ▃▇▃▂▁
Dec 1 0.9930556 5.181818e-02 0.3931681 -0.82 1.16 ▂▇▃▃▁
J-D 1 0.9930556 6.020979e-02 0.3698448 -0.48 1.02 β–†β–‡β–ƒβ–‚β–‚
D-N 2 0.9861111 6.077465e-02 0.3707192 -0.49 1.04 β–†β–‡β–ƒβ–‚β–‚
DJF 1 0.9930556 6.356643e-02 0.4049559 -0.67 1.24 ▃▇▃▂▁
MAM 0 1.0000000 6.854167e-02 0.3983760 -0.58 1.14 β–†β–‡β–…β–‚β–‚
JJA 1 0.9930556 4.769231e-02 0.3555351 -0.50 0.94 β–…β–‡β–ƒβ–‚β–‚
SON 1 0.9930556 7.286713e-02 0.3630672 -0.52 1.00 β–…β–‡β–ƒβ–‚β–‚

There is also northern hemisphere data.

nh_temps %>% my_skim() %>% select(-skim_type)   %>% gt() %>%
  cols_label(n_missing = "# Missing", complete_rate = "Completeness", 
             numeric.mean = "Mean", numeric.sd = "Standard Deviation",
             numeric.p0 = "Min", numeric.p100 = "Max",
             numeric.hist = "Histogram") %>%
  opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>%
  tab_header(title = "Northern Hemisphere Temperatures by Year") 
Northern Hemisphere Temperatures by Year
skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram
Year 0 1.0000000 1.951500e+03 41.7133072 1880.00 2023.00 β–‡β–‡β–‡β–‡β–‡
Jan 0 1.0000000 8.576389e-02 0.5778724 -1.52 1.59 ▁▅▇▅▂
Feb 0 1.0000000 1.009722e-01 0.5789589 -0.98 1.94 ▅▇▆▂▁
Mar 0 1.0000000 1.282639e-01 0.5701347 -0.80 1.91 ▅▇▃▂▁
Apr 0 1.0000000 9.756944e-02 0.4845865 -0.65 1.48 β–†β–‡β–ƒβ–‚β–‚
May 0 1.0000000 9.055556e-02 0.4203525 -0.73 1.28 β–‚β–‡β–…β–‚β–‚
Jun 1 0.9930556 7.951049e-02 0.4080447 -0.52 1.21 ▅▇▂▂▁
Jul 1 0.9930556 7.811189e-02 0.3845263 -0.59 1.10 ▃▇▃▂▁
Aug 1 0.9930556 6.384615e-02 0.4130920 -0.77 1.17 β–‚β–‡β–†β–‚β–‚
Sep 1 0.9930556 8.013986e-02 0.4194748 -0.80 1.22 ▁▇▅▂▂
Oct 1 0.9930556 1.309790e-01 0.4552020 -0.84 1.32 β–‚β–‡β–†β–‚β–‚
Nov 1 0.9930556 1.152448e-01 0.4939320 -0.83 1.61 ▃▇▅▂▁
Dec 1 0.9930556 5.692308e-02 0.5146409 -1.14 1.53 ▁▇▇▂▂
J-D 1 0.9930556 8.888112e-02 0.4444241 -0.57 1.35 ▆▇▂▂▁
D-N 2 0.9861111 9.042254e-02 0.4448506 -0.58 1.37 ▆▇▂▂▁
DJF 1 0.9930556 8.377622e-02 0.5291899 -1.05 1.67 β–‚β–‡β–†β–‚β–‚
MAM 0 1.0000000 1.055556e-01 0.4791598 -0.71 1.50 ▅▇▃▂▁
JJA 1 0.9930556 7.370629e-02 0.3969650 -0.54 1.12 β–…β–‡β–‚β–‚β–‚
SON 1 0.9930556 1.086713e-01 0.4451210 -0.72 1.34 ▃▇▅▂▁

And southern hemisphere data.

sh_temps %>% my_skim() %>% select(-skim_type)   %>% gt() %>%
  cols_label(n_missing = "# Missing", complete_rate = "Completeness", 
             numeric.mean = "Mean", numeric.sd = "Standard Deviation",
             numeric.p0 = "Min", numeric.p100 = "Max",
             numeric.hist = "Histogram") %>%
  opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>%
  tab_header(title = "Southern Hemisphere Temperatures by Year") 
Southern Hemisphere Temperatures by Year
skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram
Year 0 1.0000000 1951.50000000 41.7133072 1880.00 2023.00 β–‡β–‡β–‡β–‡β–‡
Jan 0 1.0000000 0.03875000 0.3165414 -0.63 0.80 ▁▇▅▃▂
Feb 0 1.0000000 0.03840278 0.3154565 -0.59 0.79 β–‚β–‡β–…β–ƒβ–‚
Mar 0 1.0000000 0.04750000 0.3274280 -0.59 0.81 β–‚β–‡β–ƒβ–ƒβ–‚
Apr 0 1.0000000 0.02930556 0.3421562 -0.63 0.98 ▃▇▅▃▁
May 0 1.0000000 0.01631944 0.3613025 -0.60 0.91 β–…β–‡β–…β–ƒβ–‚
Jun 1 0.9930556 -0.01083916 0.3535683 -0.67 0.81 β–ƒβ–‡β–…β–ƒβ–‚
Jul 1 0.9930556 0.03573427 0.3354343 -0.47 0.86 β–‡β–‡β–ƒβ–ƒβ–‚
Aug 1 0.9930556 0.04594406 0.3448540 -0.46 0.92 β–‡β–†β–ƒβ–ƒβ–‚
Sep 1 0.9930556 0.03748252 0.3357448 -0.50 0.91 β–‡β–‡β–ƒβ–ƒβ–‚
Oct 1 0.9930556 0.03937063 0.3242656 -0.51 0.91 ▆▇▆▃▁
Nov 1 0.9930556 0.04160839 0.3082542 -0.53 0.80 β–‚β–‡β–ƒβ–ƒβ–‚
Dec 1 0.9930556 0.04713287 0.3133837 -0.55 0.80 β–ƒβ–‡β–ƒβ–ƒβ–‚
J-D 1 0.9930556 0.03244755 0.3138072 -0.48 0.75 β–…β–‡β–‚β–ƒβ–‚
D-N 2 0.9861111 0.03218310 0.3153409 -0.50 0.75 β–…β–‡β–ƒβ–ƒβ–‚
DJF 1 0.9930556 0.04153846 0.3125887 -0.58 0.80 β–‚β–‡β–ƒβ–ƒβ–‚
MAM 0 1.0000000 0.03069444 0.3380438 -0.60 0.85 β–ƒβ–‡β–…β–…β–‚
JJA 1 0.9930556 0.02440559 0.3347516 -0.53 0.77 β–†β–‡β–ƒβ–…β–‚
SON 1 0.9930556 0.03965035 0.3150909 -0.47 0.74 β–…β–‡β–ƒβ–ƒβ–‚

And data broken down into finer zones from north to south.

zonann_temps %>% my_skim() %>% select(-skim_type)   %>% gt() %>%
  cols_label(n_missing = "# Missing", complete_rate = "Completeness", 
             numeric.mean = "Mean", numeric.sd = "Standard Deviation",
             numeric.p0 = "Min", numeric.p100 = "Max",
             numeric.hist = "Histogram") %>%
  opt_stylize(style = 6, color = "blue", add_row_striping = TRUE) %>%
  tab_header(title = "Yearly Temperatures by Zone") 
Yearly Temperatures by Zone
skim_variable # Missing Completeness Mean Standard Deviation Min Max Histogram
Year 0 1 1.951000e+03 41.4246304 1880.00 2022.00 β–‡β–‡β–‡β–‡β–‡
Glob 0 1 6.020979e-02 0.3698448 -0.48 1.02 β–†β–‡β–ƒβ–‚β–‚
NHem 0 1 8.888112e-02 0.4444241 -0.57 1.35 ▆▇▂▂▁
SHem 0 1 3.244755e-02 0.3138072 -0.48 0.75 β–…β–‡β–‚β–ƒβ–‚
24N-90N 0 1 1.090210e-01 0.5357058 -0.67 1.67 ▆▇▂▂▁
24S-24N 0 1 7.041958e-02 0.3459949 -0.63 1.01 ▂▇▅▃▁
90S-24S 0 1 -2.517483e-03 0.3105594 -0.48 0.71 β–†β–‡β–ƒβ–…β–‚
64N-90N 0 1 2.502098e-01 1.0028287 -1.77 3.24 ▂▇▅▂▁
44N-64N 0 1 1.305594e-01 0.5884089 -0.79 1.82 ▆▇▃▃▁
24N-44N 0 1 3.643357e-02 0.4192891 -0.63 1.27 ▃▇▂▂▁
EQU-24N 0 1 6.237762e-02 0.3467758 -0.67 0.97 β–‚β–‡β–†β–ƒβ–‚
24S-EQU 0 1 7.818182e-02 0.3520816 -0.59 1.07 ▃▇▅▃▁
44S-24S 0 1 4.314685e-02 0.3300329 -0.44 0.80 β–‡β–‡β–…β–ƒβ–ƒ
64S-44S 0 1 -5.657343e-02 0.2739233 -0.54 0.44 β–†β–‡β–†β–†β–†
90S-64S 0 1 -8.265734e-02 0.7609316 -2.60 1.22 ▁▂▃▇▅

The data sets are all over 0.99% complete. Handling missing values by dropping them is reasonable rather than imputing them.

Tidying the Data

I’ll note that the data isn’t tidy. In the tidy paradigm, each row corresponds to an observation. Here, multiple observations are included in each row and information is encoded in the column names. This dataset is in a wide format, rather than a long format, presumably to make it easy to compute the yearly and seasonal quarterly data. The tidyr package’s functions pivot_longer (and the converse, pivot_wider) are useful ways to reshape data.

Reshaping Data with pivot_longer

My plan is to look at the global temperature by month and then animate across years.

This is is original wide data.

head(global_temps)
# A tibble: 6 Γ— 19
   Year   Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1  1880 -0.19 -0.25 -0.09 -0.17 -0.1  -0.21 -0.18 -0.11 -0.15 -0.24 -0.22 -0.18
2  1881 -0.2  -0.15  0.03  0.05  0.05 -0.19  0    -0.04 -0.16 -0.22 -0.19 -0.08
3  1882  0.16  0.13  0.04 -0.16 -0.14 -0.22 -0.17 -0.08 -0.15 -0.24 -0.17 -0.36
4  1883 -0.3  -0.37 -0.13 -0.19 -0.18 -0.08 -0.08 -0.14 -0.23 -0.12 -0.24 -0.11
5  1884 -0.13 -0.09 -0.37 -0.4  -0.34 -0.35 -0.31 -0.28 -0.28 -0.25 -0.34 -0.31
6  1885 -0.59 -0.34 -0.27 -0.42 -0.45 -0.44 -0.34 -0.32 -0.29 -0.24 -0.24 -0.11
# β„Ή 6 more variables: `J-D` <dbl>, `D-N` <dbl>, DJF <dbl>, MAM <dbl>,
#   JJA <dbl>, SON <dbl>

I’m dropping the seasonal data and only retaining the monthly data. I then reshape it into three columns: year, month, and change in temperature. I’m dropping the na values; the default is to retain them, so values_drop_na = TRUE should be set explicitly.

global_temps_reshaped <- global_temps %>% select(Year:Dec) %>%
  pivot_longer(Jan:Dec, names_to = "month", values_to = "Delta_temp", values_drop_na = TRUE)
head(global_temps_reshaped)
# A tibble: 6 Γ— 3
   Year month Delta_temp
  <dbl> <chr>      <dbl>
1  1880 Jan        -0.19
2  1880 Feb        -0.25
3  1880 Mar        -0.09
4  1880 Apr        -0.17
5  1880 May        -0.1 
6  1880 Jun        -0.21

This reshaped data has 1721 rows compared to the 144 original rows- definitely longer!

Just double check the completeness…

skim(global_temps_reshaped) 
Data summary
Name global_temps_reshaped
Number of rows 1721
Number of columns 3
_______________________
Column type frequency:
character 1
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
month 0 1 3 3 0 12 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Year 0 1 1951.21 41.41 1880.00 1915.00 1951.00 1987.00 2023.00 β–‡β–‡β–‡β–‡β–‡
Delta_temp 0 1 0.06 0.39 -0.82 -0.22 -0.03 0.29 1.37 ▁▇▃▂▁

Setting Classes and Factor levels

Note that our new month column should be a factor. I need to provide the levels otherwise ggplot will plot the months in alphabetical order. It turns out that base R has a few built in constants and abbreviated months is one of them. (English only, I’m sorry.) This saves some typing!

global_temps_reshaped$month <-
  factor(global_temps_reshaped$month, levels = month.abb)

Year should also be an integer. This isn’t super critical for regular ggplot graphs, but for the animation, weird interpolated year values are displayed.

global_temps_reshaped$Year <- as.integer(global_temps_reshaped$Year)

Animated Plot of Temperature Change

The gganimate package page has clear examples that get you to a basic plot very quickly. The first part of the code is regular ggplot and the gganimate code specifies what other variable to scan over.

First Draft

global_temps_reshaped %>%
  ggplot(aes(month, Delta_temp)) + geom_col() +
  labs(title = "Year: {frame_time}", x = "Month", y = "Delta Temp") +
  transition_time(Year) 

Polishing the Graph

Now polish this up.

I’m going to set the scale to be fixed. This will make year over year comparisons easier. I’m also making it symmetric to illustrate that the deviations are much larger in one direction. I will also add a continuous color scale to make the changes even more obvious. I always use the viridis package whenever possible since it creates scales that easier to read for those with color blindness.

I’m going to tweak on a static version for speeds sake. The unicode items (e.g.Β \U0394) are to add various symbols. You can look them up here; you just need to remove the plus and add it with a \ in your code.

global_temps_reshaped %>%  filter(Year == 1890) %>%
  ggplot(aes(month, Delta_temp, fill = Delta_temp)) + geom_col() +
  scale_fill_viridis(option = "turbo", name = "\U0394 T (\U00B0 C)") +
  ylim(-1.5, 1.5) +
  labs(x = "Month", y = "\U0394 Temperature (\U00B0 C)") +
  labs(title = "Global Deviations in Temperature") +
  labs(subtitle = "Year: 1890") +
  labs(caption =  "Data from: NASA GISTEMP v4 via #TidyTuesday") +
  theme_pander(12)

Final Version

I went back and forth between the pander and the classic theme for the graph. Both have a white background, but pander has a grid and classic doesn’t. I decided on classic in the end. I ended up moving the caption to the left; when it was right aligned it looked like it was supposed to be aligned with the x axis label, but wasn’t.

global_temps_reshaped %>%
  ggplot(aes(month, Delta_temp, fill = Delta_temp)) + geom_col() +
  scale_fill_viridis(option = "turbo", name = "\U0394 T (\U00B0 C)") +
  ylim(-1.4, 1.4) +
  labs(x = "Month", y = "\U0394 Temperature (\U00B0 C)") +
  labs(caption =  "Data from: NASA GISTEMP v4 via #TidyTuesday") +
  theme_classic(12) +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(plot.caption = element_text(hjust = 0)) +
  labs(title = "Global Deviations in Temperature", subtitle = "Year: {frame_time}") +
  transition_time(Year)

And saving the figure.

anim_save("thumbnail.gif")

Citation

BibTeX citation:
@online{e. sinks2023,
  author = {E. Sinks, Louise},
  title = {TidyTuesday {Week} 28: {Global} {Surface} {Temperature}},
  date = {2023-07-11},
  url = {https://lsinks.github.io/posts/2023-07-11-tidytuesday-temps/temperatures},
  langid = {en}
}
For attribution, please cite this work as:
E. Sinks, Louise. 2023. β€œTidyTuesday Week 28: Global Surface Temperature.” July 11, 2023. https://lsinks.github.io/posts/2023-07-11-tidytuesday-temps/temperatures.
To leave a comment for the author, please follow the link and comment on their blog: Louise E. Sinks.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)