Skip to content

Instantly share code, notes, and snippets.

@skranz
Created May 16, 2014 17:55
Show Gist options
  • Select an option

  • Save skranz/b22b60a83f5c6ab334f7 to your computer and use it in GitHub Desktop.

Select an option

Save skranz/b22b60a83f5c6ab334f7 to your computer and use it in GitHub Desktop.

Revisions

  1. skranz created this gist May 16, 2014.
    82 changes: 82 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,82 @@
    mutate2 = function (.data,.where, ...) {
    UseMethod("mutate2")
    }

    mutate2.data.frame =function (.data,.where, ...)
    {
    if (!missing(.where)) {
    .where = substitute(.where)
    dt = as.data.table(df)
    df = as.data.frame(mutate_where.data.table(.data=dt,.where.call=.where,...,inplace=TRUE))
    return(df)
    }
    tbl <- tbl_df(.data)
    res <- mutate.tbl_df(tbl, ...)
    as.data.frame(res)
    }

    mutate2.data.table <- function (.data,.where, ..., inplace = FALSE)
    {
    if (!inplace)
    .data <- copy(.data)

    if (!missing(.where)) {
    .where = substitute(.where)
    dt = mutate_where.data.table(.data=dt,.where.call=.where,..., inplace=TRUE)
    return(dt)
    }

    env <- new.env(parent = parent.frame(), size = 1L)
    env$data <- .data
    cols <- named_dots(...)
    for (i in seq_along(cols)) {
    call <- substitute(data[, `:=`(lhs, rhs)], list(lhs = as.name(names(cols)[[i]]),
    rhs = cols[[i]]))
    eval(call, env)
    }
    .data
    }


    mutate_where.data.table <- function (.data,.where.call, ..., inplace = FALSE)
    {
    if (!inplace)
    .data <- copy(.data)
    env <- new.env(parent = parent.frame(), size = 1L)
    env$data <- .data
    cols <- named_dots(...)

    for (i in seq_along(cols)) {
    call <- substitute(data[.where.call, `:=`(lhs, rhs)], list(lhs = as.name(names(cols)[[i]]), rhs = cols[[i]], .where.call =.where.call))
    eval(call, env)
    }
    .data
    }



    examples = function() {
    library(microbenchmark)
    #library(modify)
    library(dplyr)
    library(pryr)
    library(data.table)

    # Benckmark compared to directly using data.table or dplyr
    n = 1e6
    df = data.frame(a= sample(1:5,n,replace=TRUE),
    b= sample(1:100,n,replace=TRUE),
    x=rnorm(n))
    dt = as.data.table(df)
    res1 = mutate2(df,a==3,x=x+100)
    res2 = mutate2(dt,a==3,x=x+100)


    microbenchmark(times = 5L,
    #modify(dt,a==2, x = x+100),
    mutate(dt, x=ifelse(a==2,x+100,x)),
    mutate2(dt, a==2, x=x+100),
    mutate(df, x=ifelse(a==2,x+100,x)),
    mutate2(df, a==2, x=x+100)
    )
    }