[This article was first published on S+/R – Yet Another Blog in Statistical Computing, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here) Want to share your content on R-bloggers? click here if you have a blog, or here if you don't. pkgs <- list("hflights", "doParallel", "foreach", "dplyr", "rbenchmark", "data.table") lapply(pkgs, require, character.only = T) data(hflights) benchmark(replications = 10, order = "user.self", relative = "user.self", transform = { ### THE GENERIC FUNCTION MODIFYING THE DATA.FRAME, SIMILAR TO DATA.FRAME() ### transform(hflights, wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), delay = ArrDelay + DepDelay) }, within = { ### EVALUATE THE EXPRESSION WITHIN THE LOCAL ENVIRONMENT ### within(hflights, {wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'); delay = ArrDelay + DepDelay}) }, mutate = { ### THE SPECIFIC FUNCTION IN DPLYR PACKAGE TO ADD VARIABLES ### mutate(hflights, wday = ifelse(DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), delay = ArrDelay + DepDelay) }, foreach = { ### SPLIT AND THEN COMBINE IN PARALLEL ### registerDoParallel(cores = 2) v <- c(names(hflights), 'wday', 'delay') f <- expression(ifelse(hflights$DayOfWeek %in% c(6, 7), 'weekend', 'weekday'), hflights$ArrDelay + hflights$DepDelay) df <- foreach(fn = iter(f), .combine = mutate, .init = hflights) %dopar% { [...]
[Read more...]