## Curated stuff: ## * MetaCran: http://www.r-pkg.org/ ## * List of Packages gathered by Garret G.: https://github.com/rstudio/RStartHere ## * List of popular packages https://awesome-r.com/ ## * List of DataScience R tutorials https://github.com/ujjwalkarn/DataScienceR ## * List of machine elearning tutorials by subject: https://ujjwalkarn.github.io/Machine-Learning-Tutorials/ ## Reproducible package management for R: install.packages("devtools") install.packages("tidyverse") library(devtools) #devtools::install_github("rstudio/packrat") # More info @ http://rstudio.github.io/packrat/ ## great RStudio Addins install.packages("radiant", repos = "http://vnijs.github.io/radiant_miniCRAN/", type = 'binary') install.packages("addinslist") # an AddIn that serves as AddIn Browser and Manager install.packages("ggThemeAssist") # ggplot2 theme assistance devtools::install_github("jennybc/jadd") # Convenience function for developing and debuging functions devtools::install_github("tjmahr/WrapRmd") # a tool to convienently wrap long RMD texts with R code devtools::install_github("MangoTheCat/tidyshiny") # Interactively manipulate data with the tidyr package using this handy shiny gadget. ## great RStudio Markdown Templates install.packages("rmdformats") # great templates for RMD files ## great bioconductor packages #EBIImage - Image Processing: source("http://bioconductor.org/biocLite.R"), biocLite("EBImage") ## cran packages recommended by a blogger x <- c( ## Data Import and Manipulation "broom", # Convert statistical analysis objects from R into tidy format 'XML', # tools for parsing and generating XML 'foreign', # functions for reading and writing data stored by Minitab, S, SAS, SPSS... 'lubridate', # makes it easier to work with dates and times by providing functions to identify and parse date-time data 'stringr', # makes it easier to work with strings 'sqldf', # for running SQL statements on R data frames, optimized for convenience 'RCurl', # general network (HTTP/HTTPS/FTP/...) client interface for R 'rjson', # converts R object into JSON objects and vice-versa 'xlsx', # provides R functions to read/write/format Excel 2007 and Excel 97/2000/XP/2003 file formats 'tidyr', # an evolution of reshape2. It's designed specifically for data tidying (not general reshaping or aggregating) and works well with dplyr data pipelines 'dplyr', # a fast, consistent tool for working with data frame like objects, both in memory and out of memory 'httr', # provides useful tools for working with HTTP 'data. ', # Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all ## Exploratory Data Analysis and Visualization 'ggplot2', # an implementation of the Grammar of Graphics 'RColorBrewer', # provides palettes for drawing nice maps shaded according to a variable 'ellipse', # functions for drawing ellipses and ellipse-like confidence regions 'animation', # a gallery of animations in statistics and utilities to create animations 'shiny', # elegant and powerful web framework for building interactive web applications using R ) ## my own installed packages on my system y = c( "arm", #### Graphics devices "Cairo","cairoDevice", "svglite", # fast SVG graphics device - Hadlyverse "car", # mostly used: car::recode "caTools", ## testing, asserting and validating tools "validate", # awesome package for rule based checking of data, very sophistcated and mature "assertr", # assert tool especially optimized for data checking, plays well with dplyr "ensurer", # Ensure Values at Runtime, plays well with dplyr "testthat", # r unit test framework "hadleyverse", could be also great for data checking "asserthat", # r unit test framework "hadleyverse", could be also great for data checking ## knitr suite "knitr", # literate programming for R #"printr" # yet to be released to CRAN; companion package to knitr by its creator "pander", # An R Pandoc Writer "sparkTable", # create sparklines, sparkhistograms, sparktables for shiny and knitr docs "formattable", # Awesome package to conditionally format tables by renkun-ken ## Templates for RStudio/knitr RMDMarkdown format "rmdformats", # html_clean and html_docco template ## Table generation packages "huxtable", ### my new favorite - htmlTable++ "tangram", ### grammar of tables - from hmisc creator "desctable", ### my new favorite - DescTools/tableone for the tidyverse "tableone", ### great for Table 1, works well for Propensity Score analytics "moonBook", # great for Table1 creation for medical papers "ztable", # great for table creation for medical papers "compareGroups", ### great for Table 1 "htmlTable", ### great for creating tables like in top medical journals, does not summarize only helps with formating "DescTools", ### prefect for describtive stats, great tools for plotting and descriptive stats "stargazer", # great table output for model objects "pixiedust", # a grammar for formatting tables. "tables", # has an interesting interface for producing summary statistics. It looks complex but powerful. "formattable", "ompr", # DSL for MODEL MIXED-INTEGER LINEAR PROGRAMS "ompr.ROI", # DSL for MODEL MIXED-INTEGER LINEAR PROGRAMS ## Image processing "imager", # imager: an R package for image processing ## Shiny and Shiny addons -- check out shiny.rstudio.com "shiny", # essential - R web framework "flexdashboard" # Easy interactive dashboards for R - works well with htmlwidgets "shinyAce", # code editor "shinyjs", # javascript in shiny made easy "shinythemes", # a collection of Shiny themes for the theme argument of bootstrapPage, fluidPage, navbarPage, or fixedPage "shinydashboard", # a dashboard framework, much more options than flexdashboard, but also more features ## htmlwidgtes framework for JS/D3 widgets in Shiny and knitr "htmlwidgets" , #htmlwidgtes "trelliscopejs" # great for exploratory analysis "DiagrammeR", "threejs", "DT", "d3heatmap", "networkD3", # great for static and interactive network graphics "visNetwork", # great for static and interactive network graphics "dygraphs", "leaflet", "rgl", # 3D plotting WebGL widget #https://github.com/timelyportfolio/timelineR # timelines in R "mindr" # mindmaps in markdown! ## "corrr", # tidy woking with correlations ## data structures "data.tree", # General Purpose Hierarchical Data Structure ## Graph Database utilities "tidygraph" # verbs and a tidy API for working with graphs "igraph", # igraph is a collection of network analysis and viz tools "RNeo4j", # Neo4j R driver # OrientDB - ??? "networkD3", # htmlwidget: great for static and interactive network graphics "visNetwork", # htmlwidget: great for static and interactive network graphics "ggraph", "ggnetwork", # ggplot2 extensions for graphs ## clustering "cluster", ## main functions: agnes and daisy "fpc", ## check validity of clusters: clusterboot function; kmeansruns "heatmaply", ## sophisticated heatmap ploting ## data reading and munging packages - tidyverse "tidyverse" "janitor", # great for cleaning data "dplyr", # DSL for data work - hadleyverse "wrapr", # great for NSE with dplyr, simplifies NSE programming quite a lot! "tidyr", # great for data munging - hadleyverse "purrr", # functional programming for lists - hadleyverse "broom", # Convert statistical analysis objects from R into tidy format "forcats", # working with factors "lubridate", # working with dates "magrittr", # %>% "jsonlite", # jsonlite: A Robust, High Performance JSON Parser and Generator for R "tidyjson", # a grammar for reading in deterministically json data into a data frame "yaml", # Methods to convert R data to YAML and back; works well with data.tree package "readr", # great for data IO of text files - hadleyverse "readxl", # for data IO of excel files, still buggy and unreliable - hadleyverse "janitor", # must have when reading in excel files with the readxl package "openxlsx", # great for data IO of excel files "XLConnect", # for reading in Excel - at the moment the most reliable. Backbone: Java "rvest", # convenient webscraping - hadleyverse "haven", # great for data IO of SPSS, SAS, STATA im/export - hadleyverse "import", # from the creator of magrittr: great for importing "dummies", # for easy creation of dummy variables "feather", # high speed data reading andd writing format 'doMC' # for multi-core processing # cat data analysis "vcd", # great for categorical data analysis "epibasix", # epi helper package "epicalc", # epi helper package (print_logistic), not anymore maintained "epiR", # epi helper package "epitools", # epi helper package # excat statistics "exact2x2", # exact methods "exactci", # exact methods "extracat", "ez", "gdata", ## ggplot2 and general plotting packages # A web page listing all ggplot2 extensions: http://www.ggplot2-exts.org/ "ggplot2", # see packages "cowplot" (plot_grid) "ggrepel", # extension: for direct labeling "ggforce", # extension: contextual zoom -> facet_zoom ; sinaplot -> geom_sina "ggbeeswarm", # extension: awesome beeswarm plots "gganimate", "tweenr", # extension: easy animations of ggplot2 plots "ggloop", # extension: Create 'ggplot2' Plots in a Loop "ggthemes","ggthemr", # great themes for ggplot2 "ggsci", # plotting scales and palettes of scientific journals "ggTimeSeries", # great for time series or time related data! Innovative viz! "ggpubr" # easy 'ggplot2' Based Publication Ready Plots "viridis", # academic and aestethic proven color palette # great ploting packages for special purposes "tabplot", # Tableplot, a Visualization of Large Datasets "plotluck", # fantastic and crazy simple way of beautiful automatic plotting for exploratory purposes with formulat interface "survminer", # ggplot2 based beautiful and 'ready-to-publish' survival curves with tables "rms", # survplot and estimates "survMisc", # also survival plotting "trelliscopejs" # great for exploratory analysis "GGally", # GGally - some special plots using ggplot2 "likert", # great for visualizing likert scales "dotwhisker", # Better than coefplot, Visualizing regression coefficients - very nice!, plays well with dplyr and broom "visreg", # great for visualizing a regression result "corrplot", "corrgram", # visualizing correlation matrices "forestplot", # Advanced Forest Plot Using 'grid' Graphics "venneuler", # creating Venn and Euler diagramms "dendextend", # create package for various viz of dendograms "plotROC", # create for plotting ROC curves "DiagrammeR", # excellent Diagram lib "ggfortify", # Define fortify and autoplot functions to allow ggplot2 to handle some popular R packages., "cowplot", # great for multi-panel figures with ggplot2 - plot_grid; save_ggplot3 "ggdendro", "ggRandomForests", "ggmcmc", # has gg_pairs function "plotly", # good plots "ggvis", # next gen ggplot2, early dev "plotly", # plotly API - offline dynamic viz gen. "timeline", # timeline: Timelines for a Grammar of Graphics "sjPlot", # package for plotting of forest plots and interaction term of GLMs "moonBook", # for plotting forests plots and survival curves "googleVis", "latticeExtra", "Gmisc", "gmodels", "granova","granovaGG", "gridExtra", "gtable", "gtools", "Hmisc", "scatterplot3d", # great 3d plotting library "plot3D", # "rgl", # 3D plotting including the WebGL htmlwidget "simmer", # Discrete event simulation with nice viz; plays well with dplyr # Meta research - Meta Analysis - Open Science "meta", "MAVIS", # a shiny app for Meta-Analysis "metagear", # Research Synthesis Tools for Systematic Reviews and Meta-Analysis "metafor", # comprehensive collection of functions for conducting meta-analyses in R. "meta", # meta analysis package "compute.es", # Compute Effect Sizes "MAVIS", # MAVIS: Meta Analysis via Shiny "minerva", # implementation of MIC # Survival Analysis "survMisc", # great survival misc functions "installr", # great tool for installing R and other necessary tools "lme4", # linear mixed effects "multcomp", # handling multiple comparison "lubridate", # easy working with dates - hadleyverse "MASS", # QUality Control "MethComp", "qcc", # Propensity Score tools # shiny::runGitHub("LaurenSamuels/VisualPruner") "optmatch", "twang", "Matching","MatchIt", "PSAboot","PSAgraphics", "multilevelPSA", "TriMatch", # Single/Multiple Imputation "VIM", # vizualising missing data patterns and imputing missing data with fast algorithms "mice","Amelia","mi" # multiple imputation tools, Amelia for longitudinal data "missMDA", # great for single imputation, however, unstable "mosaic", # some handy extension to stat functions, e.g. formula interface, etc. "packrat", # facilitates Reproduceable Research - Package Versioning by RStudio "checkpoint", # facilitates Reproduceable Research - Package Versioning by Revolution Analytics ## Building APIs with R "plumber", # easy creation of R webservice by just code decoration "fiery", "routr", # a web-server and web-socket server and routr for routing of HTTP and WebSocket in R, great for web service dev "jug", # small web development framework for R, make building APIs for your code as easy as possible ## Machine Learning and Statistical Modeling "h2o", # create MachienLearning libarary, AutoML "rpart", # decesion trees "rpart.plot", "party", # "randomForest", "rpart", "rpart.plot", "xgboost", # high speed gbm implementation 'gbm', # Generalized Boosted Regression Models 'glmnet', # lasso and elastic-net regularized generalized linear models 'geepack', # GEE: alternative to Linear Mixed Models for correlated data i.e. time series, clustered cohorts, ... 'contrast', 'multcomp', # great for contrasts in linear models: contrast:contrast function and # multcomp:glht function. Like the lincom command in Stata 'tree', # classification and regression trees 'randomForest', # classification and regression based on a forest of trees using random inputs 'mclust', # Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation 'car', # Companion to Applied Regression. Esp. useful for ANOVA tables. 'lme4', # linear mixed-effects models using S4 classes 'mvtnorm', # multivariate Normal and t Distributions # specially useful: # * car::recode() --> for easy recoding of any variables # * car::linear.hypothesis() --> like 'lincom' command in Stata, alterantive is survey::svycontrast() 'rms', # regression modelling tools # specially useful: # * rms::datadist() --> like 'adjust' command in Stata ## Text mining, string distances, NLP 'tm', # a framework for text mining applications within R "stringdist", # Approximate String Matching and String Distance Functions "pwr", # Power Analysis # rOpenScience and rOpenHealth Packages "rentrez", # talk with NCBI entrez using R "RISmed", # talk with NCBI entrez using R "rclinicaltrials", # an interface to clinicaltrails.gov - fetches studies meta-data and study data "rcrossref", # crossref API "fulltext", # download fulltext articles using R, "BerlinData", # BerlinData: Easy access to Berlin related data "rHealthDataGov", # This package provides an R interface to the HealthData.gov data API "rsnps", # Search and retrieve Single Nucleotide Polymorphism data from openSNP "gistr", # a light interface to GitHub's gists for R. "gtrendsR", # R Functions to Perform and Display Google Trends Queries # Interfaces to the cloud / loud services "googlesheets", # Google Spreadsheets R API "rdrop2", # Programmatic Interface to the 'Dropbox' API "Rmonkey", # A Survey Monkey R Client # robust "rms", "robustbase", # lm and glm without relying on distributional assumptions "roxygen2", # intra source documentation -hadleyverse # databases ----------- "RPostgreSQL", # interface to postgresql "MonetDBLite", # in-process database engine including dplyr backend - ultra-fast column-based storage "RSQLite", # in-process database engine including dplyr backend - rock-solid, row-based storage "RSQLite.extfuns", "stringr", # hadley whickhams string processing package "swirl" # interactive R tutorial ) install.packages(union(x, y)) ## github package z <- c("google/CausalImpact", # Estimating causal effects in time series, http://google.github.io/CausalImpact/, https://jjmullz.shinyapps.io/causal-impact/ "haozhu233/ezsummary", # Summary stats tables making use od dplyr "trinker/wakefield" # wakefield is designed to quickly generate random data sets. ) install_github(z)