# quick look sheet for comparing plyr::rbind.fill --> dplyr::bind_rows() # NB: I am only interested in migrating rbind.fill-->bind_rows(), so # features of bind_rows() absent from rbind.fill(), e.g. .id=, are not examined. rbind.fill = plyr::rbind.fill bind_rows = dplyr::bind_rows DF1 = data.frame(a = 1, b = 2) DF2 = data.frame(a = 1, b = 2) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) # API equivalence all.equal(rbind.fill(list(DF1, DF2)), bind_rows(list(DF1, DF2))) all.equal(do.call(rbind.fill, list(DF1, DF2)), do.call(bind_rows, list(DF1, DF2))) all.equal(do.call(rbind.fill, list(list(DF1, DF2))), do.call(bind_rows, list(list(DF1, DF2)))) # column order mismatch all.equal(rbind.fill(DF1, rev(DF2)), bind_rows(DF1, rev(DF2))) # column name mismatch DF2 = data.frame(a = 1, c = 3) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## name and order mismatch all.equal(rbind.fill(DF1, rev(DF2)), bind_rows(DF1, rev(DF2))) ## both argument orders all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1)) # no columns match DF2 = data.frame(c = 3, d = 4) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## both argument orders all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1)) # recursive columns ## data.frame DF1 = data.frame(a = 1) DF1$DF = data.frame(b = 2, c = 3) DF2 = data.frame(a = 4) DF2$DF = data.frame(b = 5, c = 6) ### plyr DOES NOT SUPPORT data.frame COLUMNS ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## matrix, matching column names DF1$DF <- as.matrix(DF1$DF) DF2$DF <- as.matrix(DF2$DF) ### dplyr drops column names! ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## matrix, mismatch column names colnames(DF2$DF) <- c("d", "e") ### plyr ignores mismatch, dplyr drops column names ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## matrices with row names colnames(DF1$DF) <- NULL colnames(DF2$DF) <- NULL rownames(DF1$DF) <- 'x' rownames(DF2$DF) <- 'y' ### plyr ignores row names, dplyr retains ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## list, unnamed DF1 = data.frame(a = 1) DF1$l = list(2) DF2 = data.frame(a = 3) DF2$l = list(4) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## list, named DF1 = data.frame(a = 1) DF1$l = list(b = 2) DF2 = data.frame(a = 3) DF2$l = list(b = 4) ### plyr drops list names, dplyr retains ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## list, mixed naming names(DF2$l) = NULL ### plyr drops list names, dplyr retains ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## when fill is needed DF2 = data.frame(a = 3) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## both argument orders all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1)) # empty inputs DF1 = data.frame(a = 1, b = 2) DF2 = data.frame() all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) DF2 = data.frame(a = double(), b = double()) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## 0 rows, new columns DF2 = data.frame(c = integer(), d = integer()) all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## mismatch types DF2 = data.frame(a = character(), b = character()) ### dplyr DOES NOT COMBINE EVEN 0-ROW MISMATCHES [SEE BELOW] ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) # column type mismatch ## -> logical DF_r = data.frame(r = as.raw(0)) DF_l = data.frame(v = TRUE) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_r, DF_l), bind_rows(DF_r, DF_l)) ### all.equal(rbind.fill(DF_l, DF_r), bind_rows(DF_l, DF_r)) ## -> integer DF_i = data.frame(v = 0L ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_r, DF_i), bind_rows(DF_r, DF_i)) all.equal(rbind.fill(DF_l, DF_i), bind_rows(DF_l, DF_i)) ### all.equal(rbind.fill(DF_i, DF_r), bind_rows(DF_i, DF_r)) all.equal(rbind.fill(DF_i, DF_l), bind_rows(DF_i, DF_l)) ## -> double DF_d = data.frame(v = 0.0) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_r, DF_d), bind_rows(DF_r, DF_d)) all.equal(rbind.fill(DF_l, DF_d), bind_rows(DF_l, DF_d)) all.equal(rbind.fill(DF_i, DF_d), bind_rows(DF_i, DF_d)) ### all.equal(rbind.fill(DF_d, DF_r), bind_rows(DF_d, DF_r)) all.equal(rbind.fill(DF_d, DF_l), bind_rows(DF_d, DF_l)) all.equal(rbind.fill(DF_d, DF_i), bind_rows(DF_d, DF_i)) ## -> complex DF_c = data.frame(v = 0.0 + 1.0i) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_r, DF_c), bind_rows(DF_r, DF_c)) ### SEE https://github.com/tidyverse/dplyr/issues/7685 ### all.equal(rbind.fill(DF_l, DF_c), bind_rows(DF_l, DF_c)) all.equal(rbind.fill(DF_i, DF_c), bind_rows(DF_i, DF_c)) all.equal(rbind.fill(DF_d, DF_c), bind_rows(DF_d, DF_c)) ### all.equal(rbind.fill(DF_c, DF_r), bind_rows(DF_c, DF_r)) ### all.equal(rbind.fill(DF_c, DF_l), bind_rows(DF_c, DF_l)) all.equal(rbind.fill(DF_c, DF_i), bind_rows(DF_c, DF_i)) all.equal(rbind.fill(DF_c, DF_d), bind_rows(DF_c, DF_d)) ## -> character DF_s = data.frame(v = 'a') ### dplyr::bind_rows() blocks -> character conversion! ### all.equal(rbind.fill(DF_r, DF_s), bind_rows(DF_r, DF_s)) ### all.equal(rbind.fill(DF_l, DF_s), bind_rows(DF_l, DF_s)) ### all.equal(rbind.fill(DF_i, DF_s), bind_rows(DF_i, DF_s)) ### all.equal(rbind.fill(DF_d, DF_s), bind_rows(DF_d, DF_s)) ### all.equal(rbind.fill(DF_c, DF_s), bind_rows(DF_c, DF_s)) ### all.equal(rbind.fill(DF_s, DF_r), bind_rows(DF_s, DF_r)) ### all.equal(rbind.fill(DF_s, DF_l), bind_rows(DF_s, DF_l)) ### all.equal(rbind.fill(DF_s, DF_i), bind_rows(DF_s, DF_i)) ### all.equal(rbind.fill(DF_s, DF_d), bind_rows(DF_s, DF_d)) ### all.equal(rbind.fill(DF_s, DF_c), bind_rows(DF_s, DF_c)) ## -> list DF_t = data.frame(v = 1L) DF_t$v = list(1:2) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_r, DF_t), bind_rows(DF_r, DF_t)) ### dplyr::bind_rows() blocks -> list conversion! ### all.equal(rbind.fill(DF_l, DF_t), bind_rows(DF_l, DF_t)) ### all.equal(rbind.fill(DF_i, DF_t), bind_rows(DF_i, DF_t)) ### all.equal(rbind.fill(DF_d, DF_t), bind_rows(DF_d, DF_t)) ### all.equal(rbind.fill(DF_c, DF_t), bind_rows(DF_c, DF_t)) ### all.equal(rbind.fill(DF_s, DF_t), bind_rows(DF_s, DF_t)) ### all.equal(rbind.fill(DF_t, DF_r), bind_rows(DF_t, DF_r)) ### all.equal(rbind.fill(DF_t, DF_l), bind_rows(DF_t, DF_l)) ### all.equal(rbind.fill(DF_t, DF_i), bind_rows(DF_t, DF_i)) ### all.equal(rbind.fill(DF_t, DF_d), bind_rows(DF_t, DF_d)) ### all.equal(rbind.fill(DF_t, DF_c), bind_rows(DF_t, DF_c)) ### all.equal(rbind.fill(DF_t, DF_s), bind_rows(DF_t, DF_s)) # column type mismatch, one frame is missing ## -> integer DF_l_NA = data.frame(v = NA) DF_i_NA = data.frame(v = NA_integer_) DF_d_NA = data.frame(v = NA_real_) DF_c_NA = data.frame(v = NA_complex_) DF_s_NA = data.frame(v = NA_character_) ## -> logical ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_l_NA, DF_r), bind_rows(DF_l_NA, DF_r)) ## -> integer all.equal(rbind.fill(DF_l_NA, DF_i), bind_rows(DF_l_NA, DF_i)) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_i_NA, DF_r), bind_rows(DF_i_NA, DF_r)) all.equal(rbind.fill(DF_i_NA, DF_l), bind_rows(DF_i_NA, DF_l)) ## -> double all.equal(rbind.fill(DF_l_NA, DF_d), bind_rows(DF_l_NA, DF_d)) all.equal(rbind.fill(DF_i_NA, DF_d), bind_rows(DF_i_NA, DF_d)) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_d_NA, DF_r), bind_rows(DF_d_NA, DF_r)) all.equal(rbind.fill(DF_d_NA, DF_l), bind_rows(DF_d_NA, DF_l)) all.equal(rbind.fill(DF_d_NA, DF_i), bind_rows(DF_d_NA, DF_i)) ## -> complex ### SEE https://github.com/tidyverse/dplyr/issues/7685 all.equal(rbind.fill(DF_l_NA, DF_c), bind_rows(DF_l_NA, DF_c)) all.equal(rbind.fill(DF_i_NA, DF_c), bind_rows(DF_i_NA, DF_c)) all.equal(rbind.fill(DF_d_NA, DF_c), bind_rows(DF_d_NA, DF_c)) ### raw NOT SUPPORTED BY plyr ### all.equal(rbind.fill(DF_c_NA, DF_r), bind_rows(DF_c_NA, DF_r)) ### SEE https://github.com/tidyverse/dplyr/issues/7685 ### all.equal(rbind.fill(DF_c_NA, DF_l), bind_rows(DF_c_NA, DF_l)) all.equal(rbind.fill(DF_c_NA, DF_i), bind_rows(DF_c_NA, DF_i)) all.equal(rbind.fill(DF_c_NA, DF_d), bind_rows(DF_c_NA, DF_d)) ## -> character ### dplyr::bind_rows() blocks -> character conversion, mostly! all.equal(rbind.fill(DF_l_NA, DF_s), bind_rows(DF_l_NA, DF_s)) ### all.equal(rbind.fill(DF_i_NA, DF_s), bind_rows(DF_i_NA, DF_s)) ### all.equal(rbind.fill(DF_d_NA, DF_s), bind_rows(DF_d_NA, DF_s)) ### all.equal(rbind.fill(DF_c_NA, DF_s), bind_rows(DF_c_NA, DF_s)) ### all.equal(rbind.fill(DF_s_NA, DF_r), bind_rows(DF_s_NA, DF_r)) ### all.equal(rbind.fill(DF_s_NA, DF_l), bind_rows(DF_s_NA, DF_l)) ### all.equal(rbind.fill(DF_s_NA, DF_i), bind_rows(DF_s_NA, DF_i)) ### all.equal(rbind.fill(DF_s_NA, DF_d), bind_rows(DF_s_NA, DF_d)) ### all.equal(rbind.fill(DF_s_NA, DF_c), bind_rows(DF_s_NA, DF_c)) # row names handling DF1 = data.frame(a = 1, row.names = 'a') DF2 = data.frame(a = 2, row.names = 'b') ### plyr DROPS STRING ROW NAMES ### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) ## both dplyr & plyr drop integer row names rownames(DF1) = 2L rownames(DF2) = 3L all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) # factors DF_f1 = data.frame(v = factor('a')) DF_f2 = data.frame(v = factor('a', levels = c('a', 'b'))) DF_f3 = data.frame(v = factor(NA, levels = 'a')) DF_f4 = data.frame(v = factor(NA, levels = 'd')) DF_f5 = data.frame(v = factor('a', levels = c('b', 'a'))) DF_f6 = data.frame(v = factor('1')) DF_f7 = data.frame(v = factor('b')) DF_f8 = data.frame(v = factor('a', levels = c('a', 'c'))) DF_c1 = data.frame(v = 'a') DF_c2 = data.frame(v = 'b') DF_c3 = data.frame(v = 'NA') DF_c4 = data.frame(v = NA_character_) DF_x = data.frame(x = 1.0) DF_i1 = data.frame(v = 1L) DF_i2 = data.frame(v = 100L) DF_i3 = data.frame(v = NA_integer_) all.equal(rbind.fill(DF_f1, DF_f2), bind_rows(DF_f1, DF_f2)) all.equal(rbind.fill(DF_f1, DF_c1), bind_rows(DF_f1, DF_c1)) all.equal(rbind.fill(DF_f1, DF_c2), bind_rows(DF_f1, DF_c2)) all.equal(rbind.fill(DF_f1, DF_f3), bind_rows(DF_f1, DF_f3)) all.equal(rbind.fill(DF_f1, DF_f4), bind_rows(DF_f1, DF_f4)) all.equal(rbind.fill(DF_f1, DF_x), bind_rows(DF_f1, DF_x)) all.equal(rbind.fill(DF_f1, DF_c3), bind_rows(DF_f1, DF_c3)) all.equal(rbind.fill(DF_f3, DF_c3), bind_rows(DF_f3, DF_c3)) ## surprisingly... all.equal(rbind.fill(DF_f2, DF_f5), bind_rows(DF_f2, DF_f5)) ### dplyr REFUSES TO COERCE integer --> factor/character ### all.equal(rbind.fill(DF_f1, DF_i1), bind_rows(DF_f1, DF_i1)) ### all.equal(rbind.fill(DF_f1, DF_i2), bind_rows(DF_f1, DF_i2)) ### EVEN IF THE INTEGER IS A FACTOR LEVEL ### all.equal(rbind.fill(DF_f6, DF_i1), bind_rows(DF_f6, DF_i1)) ### EVEN IF THE INTEGER IS MISSING ### all.equal(rbind.fill(DF_f1, DF_i3), bind_rows(DF_f1, DF_i3)) all.equal(rbind.fill(DF_f1, DF_f7), bind_rows(DF_f1, DF_f7)) all.equal(rbind.fill(DF_f2, DF_f8), bind_rows(DF_f2, DF_f8)) all.equal(rbind.fill(DF_f1, DF_c4), bind_rows(DF_f1, DF_c4)) all.equal(rbind.fill(DF_f3, DF_c4), bind_rows(DF_f3, DF_c4))