# lazy data frame # # like idata.frame, but exploits lazy evaluation + macro code generation # instead of active bindings library(vadr) lazy.frame <- function(df, enclos=parent.frame(), ...) UseMethod("lazy.frame") lazy.frame.lazy.frame <- function(df, ...) df lazy.frame.environment <- function(df, ..., col.order=ls(df)) { #not kosher, environment attrs get set by reference. attr(df, "accessory") <- accessory_env(df, col.order) class(df) <- union("lazy.frame", class(df)) df } lazy.frame.data.frame <- function(df, enclos=parent.frame()) { for (n in names(df)) if (is.array(df[[n]])) stop("Array column '", n, "' not supported") col.order <- structure(names(df), names=names(df)) e <- list2env(df, parent=enclos) attr(e, "accessory") <- accessory_env(e, col.order) class(e) <- c("lazy.frame", "environment") e } lazy.frame.default <- function(df, enclos=parent.frame()) lazy.frame(as.data.frame(df), enclos) accessory_env <- function( #Holds reference to e, and subsetting methods. #these are lazy, created on demand. #also note accessory env always inherits from package, so that #data env is allowed to inherit from wherever e, col.order, #use do.call because this lets "macro" memoize on the col names row_subset = do.call(row_subsetter, as.list(col.order)), col_subset = do.call(col_subsetter, as.list(col.order)), dim = c(length(e[[col.order[[1]]]]), length(col.order))) { environment() } #Row subset function constructor. #Arguments are colnames, result is accessor function row_subsetter <- macro(function(...) { col.order <- structure(c(...), names=c(...)) #macro looks up entire closure qe(function(e, rows) { #create new env with promises to subset each column new.env <- (function(`.(col.order)`=..(missing_value(length(col.order)))) environment())( ..(qqply(e$`.(col)`[rows])(col=col.order))) parent.env(new.env) <- parent.env(e) attr(new.env, "accessory") <- accessory_env( new.env, col.order, #column names do not change, so can reuse existing accessors attr(e, "accessory")$row_subset, attr(e , "accessory")$col_subset) class(new.env) <- c("lazy.frame", "environment") new.env }) }) #Column subset function constructor. #arguments are colnames, result is accessor function col_subsetter <- macro(function(...) { col.order <- structure(c(...), names=c(...)) args <- qqply(`.(col)`=e$`.(col)`)(col=col.order) col_subset_inner(col.order, args) }) col_subset_inner <- function(col.order, args) { function(e, cols) { new.cols <- col.order[cols] new.env <- do.call(env.list, args[new.cols]) parent.env(new.env) <- parent.env(e) attr(new.env, "accessory") <- accessory_env( new.env, new.cols, col_subset = col_subset_inner(new.cols, args[new.cols])) class(new.env) <- c("lazy.frame", "environment") new.env } } env.list <- macro(function(...) { args <- list(...) #print(names(args)) #verify caching works qq((function(`.(names(args))`=..(missing_value(length(args)))) environment())( ..(args))) }) `[.lazy.frame` <- function(x, i, j, drop=TRUE) { if (nargs() == 2) { j <- i i <- missing_value() drop <- FALSE } if (!missing(j)) { if (length(j) == 1 && drop && !is.logical(j)) { if (missing(i)) i <- TRUE return(x[[j]][i]) } else { x <- attr(x, "accessory")$col_subset(x, j) } } if (!missing(i)) { x <- attr(x, "accessory")$row_subset(x, i) } x } `[[.lazy.frame` <- function(x, i) { get(attr(x, "accessory")$col.order[[i]], x) } dim.lazy.frame <- function(df) { attr(df, "accessory")$dim } names.lazy.frame <- function(df) { attr(df, "accessory")$col.order } as.list.lazy.frame <- function(df) mget(attr(df, "accessory")$col.order, df) as.data.frame.lazy.frame <- function(df) plyr::quickdf(as.list(df)) ############################################################