Skip to content

Instantly share code, notes, and snippets.

@matt-dray
Last active June 21, 2024 15:43
Show Gist options
  • Save matt-dray/8c22f81a5dc4205a1da0d8975a6c42b9 to your computer and use it in GitHub Desktop.
Save matt-dray/8c22f81a5dc4205a1da0d8975a6c42b9 to your computer and use it in GitHub Desktop.

Revisions

  1. matt-dray revised this gist Jun 21, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion compare-sheet-names.R
    Original file line number Diff line number Diff line change
    @@ -40,7 +40,7 @@ compare_listed_vectors <- function(l) {
    # 3. Compare vectors ----

    # List of sheet names, elements named after the file
    l <- purrr::map(files, readxl::excel_sheets) |> setNames(basename(files))
    l <- lapply(files, readxl::excel_sheets) |> setNames(basename(files))

    # Output a data.frame with listcol of differences
    compare_listed_vectors(l)
  2. matt-dray revised this gist Jun 21, 2024. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions compare-sheet-names.R
    Original file line number Diff line number Diff line change
    @@ -8,13 +8,13 @@ make_temp_xlsx <- function(sheet_names) {
    openxlsx2::wb_save(wb, temp)
    }

    # Make three spreadsheets, some sheet names vary
    # Make multiple spreadsheets, some sheet names vary
    purrr::walk(
    list(LETTERS[1:3], LETTERS[1:3], LETTERS[1:4], LETTERS[1:5]),
    make_temp_xlsx
    )

    # Get the full path to these temporary files
    # Get the full paths to these temporary files
    files <- list.files(tempdir(), ".xlsx", full.names = TRUE)

    # 2. Compare all pairs of vectors, return differences ----
  3. matt-dray created this gist Jun 21, 2024.
    49 changes: 49 additions & 0 deletions compare-sheet-names.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,49 @@
    # 1. Generate temporary Excel files ----

    # Make a temporary spreadsheet with named sheets
    make_temp_xlsx <- function(sheet_names) {
    wb <- openxlsx2::wb_workbook()
    for (sheet in sheet_names) wb <- wb |> openxlsx2::wb_add_worksheet(sheet)
    temp <- openxlsx::temp_xlsx()
    openxlsx2::wb_save(wb, temp)
    }

    # Make three spreadsheets, some sheet names vary
    purrr::walk(
    list(LETTERS[1:3], LETTERS[1:3], LETTERS[1:4], LETTERS[1:5]),
    make_temp_xlsx
    )

    # Get the full path to these temporary files
    files <- list.files(tempdir(), ".xlsx", full.names = TRUE)

    # 2. Compare all pairs of vectors, return differences ----

    # Compare between all vector pairings, identify differences
    compare_listed_vectors <- function(l) {

    pairs <- expand.grid(names(l), names(l)) # assuming l is named
    pairs <- pairs[with(pairs, which(Var1 != Var2)), ] # don't compare self

    for (i in seq(nrow(pairs))) {
    vec1 <- pairs[i, 1]
    vec2 <- pairs[i, 2]
    diffs <- setdiff(l[[vec1]], l[[vec2]])
    diffs <- if (length(diffs) == 0) list(NULL) else list(diffs)
    pairs[i, "diffs"] <- list(diffs)
    }

    setNames(pairs, c("vec_a", "vec_b", "diffs")) # a data.frame

    }

    # 3. Compare vectors ----

    # List of sheet names, elements named after the file
    l <- purrr::map(files, readxl::excel_sheets) |> setNames(basename(files))

    # Output a data.frame with listcol of differences
    compare_listed_vectors(l)

    # Clean up temp files
    unlink(files)