Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.

Week 47 — Puzzles no. 329–333

#### Puzzles

Author: ExcelBI

Puzzles:
# 329: content file
# 330: content file
# 331: content file
# 332: content file
# 333: content file

Lets dive into solutions!

### Puzzle 329

In this puzzle we were asked to generate first 50 elements of Iccanobif sequence. Wait what? It is Fibonacci backwords… and that is exactly our goal to generate Fibonacci like sequence with one constraint: next element is a sum of previous two but with digits in reversed order.
As it looks hard, it really isn’t. And this solution is so short in base R, that translating it to another syntax would be just over complicating.

library(tidyverse)
library(stringi)

test = read_excel("Iccanobif Numbers.xlsx", range = "A2:A51", col_names = FALSE) %>% pull()

#### Approach: Base R

reverse_digits <- function(n) {
as.numeric(stri_reverse(as.character(n)))
}

generate_iccanobif <- function(N) {
iccanobif <- c(0, 1)
for (i in 3:N) {
next_term <- reverse_digits(iccanobif[i - 1]) + reverse_digits(iccanobif[i - 2])
iccanobif <- c(iccanobif, next_term)
}
iccanobif
}

iccanobif_50 <- generate_iccanobif(50)

#### Validation

identical(iccanobif_50, test)
#> [1] TRUE

### Puzzle 330

In puzzle #330 we have sentences in table and our goal is to return matrix populated rowwise containing only words that were longer than average word in original sentence.

library(tidyverse)
library(tidytext)
library(data.table)

input = read_excel("Average Word Length.xlsx", range= "A1:A10")
test = read_excel("Average Word Length.xlsx", range = "B2:D5", col_names = F)
colnames(test) = c("1", "2", "3")

#### Approach 1: tidyverse

result = input %>%
mutate(number = row_number()) %>%
unnest_tokens(word, Books) %>%
group_by(number) %>%
mutate(word_len = nchar(word)) %>%
reframe(number, word, word_len, avg_len = mean(word_len)) %>%
ungroup() %>%
filter(word_len > avg_len) %>%
select(word) %>%
mutate(group = rep(1:4, each = 3)) %>%
group_by(group) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = word) %>%
ungroup() %>%
select(-group) %>%
mutate(across(everything(), ~ str_to_title(.x)))

#### Approach 2: data.table

input_dt <- setDT(copy(input))

input_dt[, number := .I]
input_dt_long <- input_dt[, .(word = unlist(strsplit(Books, " "))), by = number]
input_dt_long[, word_len := nchar(word)]
input_dt_long[, avg_len := mean(word_len), by = number]

input_dt_filtered <- input_dt_long[word_len > avg_len, .(word, number)]
input_dt_filtered[, group := rep(1:4, each = 3, length.out = .N)]
input_dt_filtered[, row := rep(1:3, times = 4, length.out = .N)]
wider_dt <- dcast(input_dt_filtered, group ~ row, value.var = "word")
setnames(wider_dt, old = names(wider_dt), new = c("group", "1", "2", "3"))

wider_dt[, group := NULL]

#### Approach 3: base R

input_base <- input
input_base\$number = seq_along(input_base\$Books)
words_list_base = strsplit(input_base\$Books, " ")
words_df_base = setNames(data.frame(do.call(rbind, lapply(words_list_base, function(x) data.frame(word = x))), row.names = NULL), c("word"))
words_df_base\$number = rep(input_base\$number, sapply(words_list_base, length))
words_df_base\$word_len = nchar(as.character(words_df_base\$word))
words_df_base\$avg_len = ave(words_df_base\$word_len, words_df_base\$number, FUN = mean)
filtered_df_base = subset(words_df_base, word_len > avg_len, select = c("word", "number"))
filtered_df_base\$group = rep(1:4, each = 3, length.out = nrow(filtered_df_base))
mat = matrix(filtered_df_base\$word, nrow = 4, ncol = 3, byrow = TRUE)
wider_df_base = as_tibble(as.data.frame(mat))
colnames(wider_df_base) = c("1", "2", "3")
wider_df_base[] = lapply(wider_df_base, function(x) if(is.character(x)) tools::toTitleCase(x) else x)

#### Validation

identical(test, result)
#> [1] TRUE

identical(test, wider_dt)
#> [1] TRUE

identical(test, wider_df_base)
#> [1] TRUE

### Puzzle 331

In this puzzle we were asked to find which of given numbers can be represented as sum of two cubes.

library(tidyverse)
library(data.table)

input = read_excel("Sum of Cube of Two Numbers.xlsx", range = "A1:A10")
test = read_excel("Sum of Cube of Two Numbers.xlsx",
range = "B2:D6",
col_names = c("Number", "Factor1", "Factor2"))

#### Approach 1: tidyverse

check_if_sum_of_cubes = function(number) {
x = floor(number^(1/3))

range = data.frame(Number = number, Factor1 = 1:x) %>%
mutate(diff = number - Factor1^3,
is_cube = round(diff^(1/3))^3 == diff) %>%
filter(is_cube) %>%
mutate(Factor2 = diff^(1/3)) %>%
slice(1) %>%
select(Number, Factor1, Factor2)
return(range)
}

result = map_dfr(input\$Number, check_if_sum_of_cubes) %>% as_tibble()

#### Approach 2: data.table

check_if_sum_of_cubes_dt <- function(number) {
x <- floor(number^(1/3))

DT <- CJ(Factor1 = 1:x, Number = number)
DT[, `:=`(diff = Number - Factor1^3)]
DT[, `:=`(is_cube = round((diff)^(1/3))^3 == diff,
Factor2 = (diff)^(1/3))]

result <- DT[is_cube == TRUE, .(Number, Factor1, Factor2)]
result <- result[order(Factor1)][1]

return(result)
}

input_numbers_dt <- as.vector(input\$Number)
result_dt <- rbindlist(lapply(input_numbers_dt, check_if_sum_of_cubes_dt))
result_dt <- as.data.frame(result_dt) %>% drop_na()

#### Approach 3: base R

check_if_sum_of_cubes_base <- function(number) {
x <- floor(number^(1/3))

df <- expand.grid(Factor1 = 1:x, Number = number)
df\$diff <- df\$Number - df\$Factor1^3
df\$is_cube <- round(df\$diff^(1/3))^3 == df\$diff
df\$Factor2 <- df\$diff^(1/3)

result <- df[df\$is_cube, ]
result <- result[order(result\$Factor1), ]
result <- result[1, c("Number", "Factor1", "Factor2")]

return(result)
}

input_numbers <- as.vector(input\$Number)
result_base <- do.call("rbind", lapply(input_numbers, check_if_sum_of_cubes_base))
result_base <- as.data.frame(result_base) %>% drop_na()

#### Validation

Sometimes aligning all structures to be identical generate to much code. Then I we can also check it visually.

> test
# A tibble: 5 × 3
Number Factor1 Factor2
<dbl>   <dbl>   <dbl>
1       35     2         3
2      855     7         8
3     3744    10        14
4   300827     4        67
5 90000576    44.0     448

> result
# A tibble: 5 × 3
Number Factor1 Factor2
<dbl>   <int>   <dbl>
1       35       2       3
2      855       7       8
3     3744      10      14
4   300827       4      67
5 90000576      44     448

> result_base
Number Factor1 Factor2
2        35       2       3
7       855       7       8
10     3744      10      14
4    300827       4      67
44 90000576      44     448

> result_dt
Number Factor1 Factor2
1       35       2       3
2      855       7       8
3     3744      10      14
4   300827       4      67
5 90000576      44     448

### Puzzle 332

One of the shortest solutions which I solved so far, but it is linked to the fact that there were new functions introduced in MS Excel. We had to find maximal and minimal value per group.

library(tidyverse)
library(data.table)

input = read_excel("Max Min.xlsx", range = "A1:B20")
test = read_excel("Max Min.xlsx", range = "D2:F6")

#### Approach 1: tidyverse

result = input %>%
group_by(Zone) %>%
summarise(Max = max(Sales), Min = min(Sales))

#### Approach 2: data.table

input_dt = setDT(input)

result_dt = input_dt[, .(Max = max(Sales), Min = min(Sales)), by = Zone][order(Zone)]
result_dt = as_tibble(result_dt)

#### Approach 3: base R

result_base <- do.call(data.frame, aggregate(Sales ~ Zone, data = input, FUN = function(x) c(Max = max(x), Min = min(x))))
names(result_base) <- c("Zone","Max", "Min")

#### Validation

identical(test, result)
#> [1] TRUE

identical(test, result_dt)
#> [1] TRUE

identical(test, as_tibble(result_base))
#> [1] TRUE

### Puzzle 333

As we all know words consists of letters, and in many words letters repeat itself. And that is the topic in this puzzle. We have words and we need to cut them… before second occurence of first repeating letter (bull becomes bul, excel becomes exc and so on).

library(tidyverse)
library(data.table)

input = read_excel("Extract String Before a Repeated Character.xlsx", range = "A1:A10")
test = read_excel("Extract String Before a Repeated Character.xlsx", range = "B1:B10")

#### Approach 1: tidyverse

result = input %>%
mutate(lett = str_split(str_to_lower(String), pattern = ""),
reps = map(lett, ~ duplicated(.x)),
first_reps = map_int(reps, ~ which(.x)[1]),
str_sub(String, 1, first_reps-1),
String)) %>%

#### Approach 2: data.table

input_dt <- as.data.table(input)

input_dt[, lett := strsplit(tolower(String), "")]

input_dt[, reps := lapply(lett, function(x) duplicated(x))]
input_dt[, first_reps := sapply(reps, function(x) { pos <- which(x)[1]; if (length(pos) > 0) pos else NA })]
input_dt[, `Answer Expected` := ifelse(!is.na(first_reps), substr(String, 1, first_reps - 1), String)]

#### Approach 3: base R

input_b <- input
input_b\$lett <- strsplit(tolower(input_b\$String), "")
input_b\$reps <- lapply(input_b\$lett, function(x) duplicated(x))
input_b\$first_reps <- sapply(input_b\$reps, function(x) { pos <- which(x)[1]; if (length(pos) > 0) pos else NA })
input_b\$`Answer Expected` <- ifelse(!is.na(input_b\$first_reps), substr(input_b\$String, 1, input_b\$first_reps - 1), input_b\$String)

#### Validation

identical(result, test)
#> [1] TRUE

#> [1] TRUE

#> [1] TRUE

### Conclussion

Feel free to comment, share and contact me with advices, questions and your ideas how to improve anything.

R Solution for Excel Puzzles was originally published in Numbers around us on Medium, where people are continuing the conversation by highlighting and responding to this story.