library(tidyverse) library(lubridate) library(glue) library(rvest) libray(janitor) scrape_nfl_table <- function(url, page) { glue("{url}&d-447263-p={page}") %>% read_html() %>% html_nodes("#result") %>% html_table() %>% flatten_df() %>% mutate_at(c(1, 5:8, 10:15, 17:19), as.numeric) %>% mutate_at(9, as.character) %>% clean_names() %>% mutate_at("yds", parse_number) } pull_nfl_statistics <- function(archive, conference, statisticCategory, season, seasonType) { Sys.sleep(5) if (season != 2018) { archive <- "true" } else { archive <- "false" } url <- glue("http://www.nfl.com/stats/categorystats?archive={archive}&conference={conference}&statisticCategory={statisticCategory}&season={season}&seasonType={seasonType}&experience=&tabSeq=0&qualified=true&Submit=Go") pages <- read_html(url) %>% html_nodes("#main-content > div.c > div.grid > div.col.span-12 > form > span:nth-child(4)") %>% html_text() %>% str_extract_all("\\d", simplify = TRUE) map_dfr(pages, ~scrape_nfl_table(url, .x)) } scaffold <- tibble(archive = "true", conference = "null", statisticCategory = "PASSING", season = 2018, seasonType = "REG") output <- pmap(scaffold, pull_nfl_statistics)