Skip to content

Instantly share code, notes, and snippets.

@daroczig
Created October 7, 2019 06:12
Show Gist options
  • Save daroczig/ef858d11b159f390b35fbbf8300b378d to your computer and use it in GitHub Desktop.
Save daroczig/ef858d11b159f390b35fbbf8300b378d to your computer and use it in GitHub Desktop.

Revisions

  1. daroczig created this gist Oct 7, 2019.
    86 changes: 86 additions & 0 deletions create-local-MRAN-snapshot.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,86 @@
    #!/usr/bin/env r

    library(miniCRAN)
    library(data.table)
    library(pander)
    library(logger)
    library(magrittr)
    library(docopt)

    ## #############################################################################
    ## config

    "Make local MRAN mirror for a given date in the current working directory's {date} folder
    Usage: create-local-MRAN-snapshot.R [options] <packages>...
    Options:
    --date=d YYYY-MM-DD pointer to the MRAN snapshot date [default: 2019-01-01]
    --force re-download everything even if the {date} folder already exists
    " %>% docopt -> opts

    ## #############################################################################
    ## prep folder

    SNAPSHOT_DATE <- opts$date
    SNAPSHOT_URL <- file.path('https://mran.microsoft.com/snapshot', SNAPSHOT_DATE)
    log_info('MRAN mirror: ', SNAPSHOT_URL)

    if (opts$force) {
    log_info(
    'Killing local snapshot {SNAPSHOT_DATE} ',
    'with {length(list.files(SNAPSHOT_DATE, recursive = TRUE))} files')
    unlink(SNAPSHOT_DATE, recursive = TRUE)
    }

    ## #############################################################################
    ## check on prior runs

    dir.create(SNAPSHOT_DATE, showWarnings = FALSE)
    files <- list.files(SNAPSHOT_DATE, recursive = TRUE, full.names = TRUE)
    files <- cbind(path = files, file = basename(files), rbindlist(lapply(files, file.info)))

    already_downloaded_packages <- sub('^([^_]*).*$', '\\1', files$file)
    already_downloaded_packages <- already_downloaded_packages[!grepl('^PACKAGES', already_downloaded_packages)]

    log_info('{length(already_downloaded_packages)} already downloaded R packages')
    pandoc.list(already_downloaded_packages)

    ## #############################################################################
    ## list new packages

    ## get the list of explicitly required packages
    log_info('{length(opts$packages)} R packages listed explicitly:')
    pandoc.list(opts$packages)

    ## add all required R packages as per dependency graph
    packages <- pkgDep(opts$packages, repos = SNAPSHOT_URL, type = "source", suggests = FALSE)
    log_info('{length(packages)} R packages identified after looking up dependencies:')
    pander(
    data.table(available.packages(repos = SNAPSHOT_URL))[Package %in% packages, .(Package, Version)],
    style = 'simple', justify = 'right')

    packages <- setdiff(packages, already_downloaded_packages)
    log_info('{length(packages)} R packages to be downloaded and added to the local repo:')
    pandoc.list(packages)

    if (length(packages) == 0) {
    log_info('Nothing to download now, exiting')
    quit(save = 'no', status = 0L)
    }

    ## #############################################################################
    ## download new packages

    if (length(already_downloaded_packages) > 0) {
    ## repo already exists, we just need to add new stuff
    addPackage(packages, path = SNAPSHOT_DATE, repos = SNAPSHOT_URL, deps = FALSE, type = 'source')
    } else {
    ## it's a new repo
    makeRepo(packages, path = SNAPSHOT_DATE, repos = SNAPSHOT_URL, type = 'source')
    }

    files <- list.files(SNAPSHOT_DATE, recursive = TRUE, full.names = TRUE)
    files <- cbind(path = files, file = basename(files), rbindlist(lapply(files, file.info)))
    log_info('Overall {nrow(files)} files downloaded so far:')
    pander(files[, .(file, size)], style = 'simple', justify = 'right')