library(dplyr)
library(purrr)

# helper to make examples
new_tbl <- function(n_groups, row_range = 5:10) {

  1:n_groups %>%
    map_df(~tibble(
      group_id = .x,
      value    = rnorm(sample.int(row_range, 1L))
    ))
}

# take the row with biggest value in some column
get_max_slicer <- function(col, time) {
  
  col <- enquo(col)
  
  function(tbl) {

    Sys.sleep(time)
    
    tbl %>%
      arrange(desc(!!col)) %>%
      slice(1L)
  }
}

#### functions ####

# sequential
do_sequential <- function(tbl, func) {
  
  tbl %>%
    group_by(group_id) %>%
    do(func(.))
}

# naive-but-easy parallel
library(future)
plan(multiprocess)

do_small_groups <- function(tbl, func) {
  
  tbl %>%
    split(.$group_id) %>%
    map(~future(
      func(.x)
    )) %>%
    values() %>%
    bind_rows()
}

# smarter, probably
do_big_groups <- function(tbl, func, cores = availableCores()) {
  
  tbl %>%
    split(.$group_id %% cores) %>%  # relies on group_id being contiguous ints
    map(~future(
      do_sequential(.x, func)
    )) %>%
    values() %>%
    bind_rows()
}

#### validate ####

tbl    <- new_tbl(10L)
slicer <- get_max_slicer(value, 0.1)

funcs <- lst(
  do_sequential,
  do_small_groups,
  do_big_groups
)

results <- map(funcs, ~.x(tbl, slicer))

# compare each result to the one before
map2_lgl(
  results[-1], results[-length(results)],
  all_equal
) %>%
  all() %>%
  stopifnot()


#### benchmarking ####

library(microbenchmark)
library(tidyr)
library(ggplot2)

timer <- function(groups, time, functions, repeats = 3L) {
  
  tbl <- new_tbl(groups)
  f   <- get_max_slicer(value, time)
  
  functions %>%
    map_dfr(
      ~microbenchmark(.x(tbl, f), times = repeats, unit = "s") %>% summary(),
      .id = "func"
    ) %>%
    select(-expr)
}

results <- list(
  iter_time = c(0.01, 0.1, 1),
  n_groups  = c(10L, 20L, 40L)
) %>%
  cross_df() %>%
  mutate(timings = map2(n_groups, iter_time, timer, funcs)) %>%
  unnest(timings)

plot_times <- function(tbl) {
  
  tbl %>%
    ggplot(aes(x = n_groups, y = mean, color = func)) +
    facet_wrap(~iter_time) +
    scale_x_continuous(breaks = unique(tbl$n_groups)) +  # must be a simpler way?
    geom_line() + geom_point()
}

results %>%
  mutate(func = factor(func, names(funcs))) %>%
  plot_times()

# more time, parallel options only

p_results <- list(
  iter_time = c(0.05, 1, 2),
  n_groups  = c(10L, 100L, 1000L)
) %>%
  cross_df() %>%
  mutate(timings = map2(n_groups, iter_time, timer, funcs[-1])) %>%
  unnest(timings)

p_results %>%
  mutate(func = factor(func, names(funcs))) %>%
  plot_times()