% str_remove('Eliminated ') %>% as.numeric() ) %>% # Remove Contestants that Withdraw filter(!str_detect(result, 'Withdrew')) %>% group_by(season) %>% # Add the number of contestants for each season mutate(n_contestants = n()) %>% ungroup() %>% #Overwrite Places for 1st/2nd/3rd mutate( place = case_when( str_detect(result, "Winner") ~ 1, str_detect(result, "Runner|Second") ~ 2, str_detect(result, "Third") ~ 3, str_detect(result, "Fourth") ~ 4, TRUE ~ n_contestants - eliminated_state + 1 ), # Standardize What Contestants Are "Known For" known_for = case_when( str_detect(str_to_lower(notability_known_for), 'actor|actress|disney') ~ 'Actor/Actress', str_detect(str_to_lower(notability_known_for), 'singer|rapper|band|composer') ~ 'Musician', str_detect(str_to_lower(notability_known_for), 'model|miss usa') ~ 'Model', str_detect(str_to_lower(notability_known_for), 'nhl|nfl|nba|boxer|olympi|diva|tennis|soccer|football|lakers|swim|ufc|nascar|snowboard|wwe|mlb|basketball|rodeo|skier|race car|jockey|dolphins|steelers|packers|lakers|indy 500') ~ 'Athlete', str_detect(str_to_lower(notability_known_for), 'journ|anchor|host|caster|personality') ~ 'Media Personality', str_detect(str_to_lower(notability_known_for), 'bachelor|star|chef') ~ 'Reality TV Star', str_detect(str_to_lower(notability_known_for), 'comedian|magician|entertainer') ~ 'Entertainer', str_detect(str_to_lower(notability_known_for), 'owner|co-founder|business|designer') ~ 'Businessperson', TRUE ~ "Other" ) ) %>% # Fix Celebrity Column for Season 29 mutate(celebrity = if_else(is.na(celebrity), celebrity_12_13, celebrity)) %>% # Remove Unneeded Columns select(-contains('professional'), -ref, -status, -eliminated_state, -celebrity_12_13) %>% #Want Scores to be between 0 and 1 where 1 is Last Place and 0 is first place. mutate(scaled_place = (place-1)/(n_contestants-1)) The scaled_place variable will be used to create a standardized density plot by putting each season on a 1 (Last Place) to 0 (1st Place) scale regardless of the number of contestants in the season. The cleaned data now looks like: celebrity notability_known_for season result status_date n_contestants place known_for scaled_place Trista Sutter The Bachelorette star 1 Eliminated 1ston June 8, 2005 2005-06-08 6 6 Reality TV Star 1.0 Evander Holyfield Heavyweight boxer 1 Eliminated 2ndon June 15, 2005 2005-06-15 6 5 Athlete 0.8 Rachel Hunter Supermodel 1 Eliminated 3rdon June 22, 2005 2005-06-22 6 4 Model 0.6 Joey McIntyre New Kids on the Block singer 1 Third placeon June 29, 2005 2005-06-29 6 3 Musician 0.4 John O’Hurley Actor & game show host 1 Runner-upon July 6, 2005 2005-07-06 6 2 Actor/Actress 0.2 Using Regular Expressions, I’ve collapsed 237 different levels into 9 which are: Profession Examples Actor/Actress Zendaya, Alexa PenaVega, Amber Riley Athlete Jamie Anderson, Antonio Brown, Martina Navratilova Businessperson Steve Wozniak, Robert Herjavec, Mark Cuban Entertainer Penn Jillette, Marie Osmond, Margaret Cho Media Personality Jerry Springer, Bobby Bones, Giselle Fernandez Model Bonner Bolton, Shandi Finnessey, Sailor Brinkley-Cook Musician Joey McIntyre, Gavin DeGraw, Nick Carter Other Sean Spicer, Buzz Aldrin, Noah Galloway Reality TV Star The Situation, Lisa Vanderpump, Terra Jolé Constructing The Table Organizing the Data For the table, the information we want is for each “Profession”: How many contestants were there? What percentages came in 1st, 2nd, 3rd, and Last? Some quick dplyr magic will allow us to collapse the list of contestants into the structure we want. We’ll also set the order of the table by the descending percentage of first place wins by “profession”. contestant_summary % group_by(known_for) %>% summarize( num_stars = n(), pct_1st_place = sum(place == 1)/n(), pct_2nd_place = sum(place == 2)/n(), pct_3rd_place = sum(place == 3)/n(), pct_last_place = sum(n_contestants == place) / n() ) %>% arrange(-pct_1st_place) Using {gt} to Build the Table Now onto actually constructing the table with gt. The gt package provides a grammar for tables similar to what ggplot2 does for charts. The package provides this visualization to show the different parts of a table: Step 1: The basic construction The most basic construction of a table is done by using the gt() function. (g1 % tab_options( table.border.top.color = "white", data_row.padding = px(0), ) %>% cols_width( 1 ~ px(200), ) ) Most Successful Dancing With the Stars "Professions" Covering Seasons 1 to 29 (excluding All-Star Season 15) # ⭐s Distribution of Results 🥇(1st) 🥈(2nd) 🥉(3rd) 💩 (last) Athlete 79 13.9% 10.1% 6.3% 10.1% Musician 38 7.9% 10.5% 15.8% 13.2% Actor/Actress 130 7.7% 9.2% 6.9% 4.6% Reality TV Star 26 7.7% 7.7% 7.7% 3.8% Model 14 7.1% 7.1% 0% 7.1% Media Personality 21 4.8% 4.8% 4.8% 23.8% Businessperson 5 0% 0% 0% 20% Entertainer 5 0% 0% 20% 40% Other 9 0% 0% 44.4% 0% Data: DWTS Wikipedia Articles | Table Author: JLaw Step 12: Adding a Color Scale for the % Columns The data_color function allows for doing conditional formatting based on the values in the columns. The columns argument allows to specific which colors should receive the formatting. The colors argument defines the palette. And the apply_to argument can take the values of “fill” to fill the background or “text” to change the color of the text. (g12 % data_color( columns = vars(pct_1st_place, pct_2nd_place, pct_3rd_place, pct_last_place), colors = scales::col_numeric( palette = c("white", "#3fc1c9"), #F2CB05 = Gold COlor domain = NULL ), apply_to = "fill", ) ) Most Successful Dancing With the Stars "Professions" Covering Seasons 1 to 29 (excluding All-Star Season 15) # ⭐s Distribution of Results 🥇(1st) 🥈(2nd) 🥉(3rd) 💩 (last) Athlete 79 13.9% 10.1% 6.3% 10.1% Musician 38 7.9% 10.5% 15.8% 13.2% Actor/Actress 130 7.7% 9.2% 6.9% 4.6% Reality TV Star 26 7.7% 7.7% 7.7% 3.8% Model 14 7.1% 7.1% 0% 7.1% Media Personality 21 4.8% 4.8% 4.8% 23.8% Businessperson 5 0% 0% 0% 20% Entertainer 5 0% 0% 20% 40% Other 9 0% 0% 44.4% 0% Data: DWTS Wikipedia Articles | Table Author: JLaw This looks pretty good… but we can do better!!! Turning it up to 11 by adding in Density Plots In order to add in ggplots into a row in the table we need to: Build a function to create the plot for each row of the table Use purrr:map() to add the plot as a list-column to the table Use gt::text_transform to insert the image into the table NOTE: Since this required making a new data set much of the gt code is repeating the first section but is provided in its entirely for completeness. Writing the function to build the chart For the function I want it to take a “profession” and return a density part using the scaled_place variable defined at the top. The function takes in a profession label and a dataset and returns a density plot. plot_dens % unique, plot_dens, data = contestant_clean)) %>% left_join( ###Add in Recent Winner Images contestant_clean %>% filter(place == 1) %>% group_by(known_for) %>% slice_max(season, n = 1) %>% select(celebrity, season, known_for) %>% ungroup() %>% transmute( known_for, lbl = paste0(celebrity,' (Season ',season,")") ) ) Creating the Final Table In order to turn the plots into columns the text_transform() function is used to take the plots column and run a function that calls ggplot_image with certain height and aspect ratio parameters on each row in the table. text_transform( locations = cells_body(vars(plots)), fn = function(x) { map(contestant_summary_with_graph$plots, ggplot_image, height = px(120), aspect_ratio = 1.5) } ) Now we can put it all together. Besides adding in the plots, there’s a few steps to format the Most Recent Winner cell. But nothing that hasn’t been covered earlier. #Base Table gt(contestant_summary_with_graph) %>% #Add Titles tab_header( title = html('Most Successful Dancing With the Stars "Professions"'), subtitle = html( "Covering Seasons 1 to 29 (excluding All-Star Season 15)" ) ) %>% #Format Title tab_style( style = cell_text( font = google_font("Anton"), align = "left", size = "xx-large" ), locations = cells_title("title") ) %>% #Format Subtitle tab_style( style = cell_text( font = google_font("Caveat"), align = "left", size = "x-large" ), locations = cells_title("subtitle") ) %>% #Adding Spanning Column tab_spanner( label = "Distribution of Results", columns = 3:7 ) %>% #Style The Spanner Column tab_style( style = cell_text( font = google_font("Courgette"), size = "medium", weight = "bold" ), locations = cells_column_spanners("Distribution of Results") ) %>% #Style the Column Labels and Profession Column tab_style( style = cell_text( font = google_font("Secular One"), size = "large" ), locations = list( cells_column_labels(everything()), cells_body(columns = 1) ) ) %>% #Style the Cells tab_style( style = cell_text( font = google_font("Spartan"), size = "medium", align = 'center' ), locations = cells_body(columns = 2:6) ) %>% #Format Cells to %s fmt_percent( columns = starts_with('pct'), decimals = 1, drop_trailing_zeros = TRUE ) %>% #Turn Headers to Emojis cols_label( known_for = "", num_stars = paste0("# ",emo::ji('star'), "s"), pct_1st_place = paste0(emo::ji("1st_place_medal"), "(1st)"), pct_2nd_place = paste0(emo::ji("2nd_place_medal"), "(2nd)"), pct_3rd_place = paste0(emo::ji("3rd_place_medal"), "(3rd)"), pct_last_place = paste0(emo::ji("poo"), " (last)"), plots = "", lbl = "Most Recent Winner" ) %>% ###Add in Source and Doing Some Minor Formatting tab_source_note(md("**Data:** DWTS Wikipedia Articles | **Table Author:** JLaw")) %>% tab_options( table.border.top.color = "white", data_row.padding = px(0), ) %>% cols_width( 1 ~ px(200) ) %>% ###Add a Color Scale for 1st Place data_color( columns = vars(pct_1st_place, pct_2nd_place, pct_3rd_place, pct_last_place), colors = scales::col_numeric( palette = c("white", "#3fc1c9"), #F2CB05 = Gold COlor domain = NULL ), apply_to = "fill", ) %>% ######################NEW THINGS START HERE######################### # Add In Density Plots (NEW) text_transform( locations = cells_body(vars(plots)), fn = function(x) { map(contestant_summary_with_graph$plots, ggplot_image, height = px(120), aspect_ratio = 1.5) } ) %>% text_transform( locations = cells_body(vars(lbl)), fn = function(x){ if_else(!is.na(x), str_replace_all(x, " \\(", " \\("), "") } ) %>% tab_style( style = cell_text( style = 'italic', size = px(13), v_align = 'middle', align = 'left' ), locations = cells_body(columns = vars(lbl)) ) %>% cols_width( 8 ~ px(100) ) Most Successful Dancing With the Stars "Professions" Covering Seasons 1 to 29 (excluding All-Star Season 15) # ⭐s Distribution of Results Most Recent Winner 🥇(1st) 🥈(2nd) 🥉(3rd) 💩 (last) Athlete 79 13.9% 10.1% 6.3% 10.1% Adam Rippon (Season 26) Musician 38 7.9% 10.5% 15.8% 13.2% Kellie Pickler (Season 16) Actor/Actress 130 7.7% 9.2% 6.9% 4.6% Jordan Fisher (Season 25) Reality TV Star 26 7.7% 7.7% 7.7% 3.8% Kaitlyn Bristowe (Season 29) Model 14 7.1% 7.1% 0% 7.1% Brooke Burke (Season 7) Media Personality 21 4.8% 4.8% 4.8% 23.8% Bobby Bones (Season 27) Businessperson 5 0% 0% 0% 20% Entertainer 5 0% 0% 20% 40% Other 9 0% 0% 44.4% 0% Data: DWTS Wikipedia Articles | Table Author: JLaw So what is the most successful “profession” in DWTS? Seems pretty clearly to be the athletes as close to 14% of the Athletes have wound up winning. On the other end of the spectrum, the Media Personalities have faired less well with the lowest winning percentage of a group with 10+ stars and nearly 1 in 4 coming in last place… Reality TV Stars, while in the middle of the pack has been surging with ex-Bachelorettes winning the last two seasons (28 and 29). " />

What’s the most successful Dancing With the Stars “Profession”? Visualizing with {gt}

[This article was first published on R | JLaw's R Blog, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

To leave a comment for the author, please follow the link and comment on their blog: R | JLaw's R Blog.

R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Never miss an update!
Subscribe to R-bloggers to receive
e-mails with the latest R posts.
(You will not see this message again.)

Click here to close (This popup will not appear again)