Created
July 30, 2018 13:17
-
-
Save cwickham/d66f8fc1b59a84284ce90adbfcea9b83 to your computer and use it in GitHub Desktop.
Revisions
-
cwickham created this gist
Jul 30, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,76 @@ library(tidyverse) library(xml2) url <- "http://www.aoml.noaa.gov/hrd/hurdat/hurdat2-nepac.html" # Column names ------------------------------------------------------------ # From: http://www.aoml.noaa.gov/hrd/hurdat/newhurdat-format.pdf # for the data lines wind_vars <- cross_df(list( y = c("NE", "SE", "SW", "NW"), x = c("34", "50", "64"))) %>% glue::glue_data("winds_{x}_{y}") col_names <- c("date", "time", "record_id", "status", "lat", "lon", "max_wind", "min_pressure", wind_vars, "empty" ) # Import ------------------------------------------------------------------ hurricanes <- read_html(url) %>% xml_find_first(".//pre") %>% xml_text() %>% read_csv(col_names = col_names, col_types = cols(record_id = col_character())) # Warnings correspond to header lines problems(hurricanes) # Pull apart header and data rows ----------------------------------------- # Find headers based on first two characters in first column (date) # specifiying basin hurricanes <- hurricanes %>% mutate( header = str_detect(date, "[A-Z]{2}"), id = cumsum(header) ) # Now for each hurricane add header info as columns to data hurricanes_tidy <- hurricanes %>% group_by(id) %>% mutate( cyclone_id = first(date), name = first(time), n_records = first(record_id) ) %>% slice(-1) %>% ungroup() # and fix up a few data types hurricanes_clean <- hurricanes_tidy %>% mutate( datetime = lubridate::ymd_hm(paste(date, time, sep = "T")), date = lubridate::date(datetime), time = hms::as.hms(datetime), lat = parse_number(lat), lon = parse_number(lon) ) # some quick checks ------------------------------------------------------- # number of records matches that reported hurricanes_clean %>% group_by(cyclone_id) %>% summarise(n = n(), n_records = first(n_records), match = n == n_records) %>% summarise(all(match)) # Quick messy plot hurricanes_clean %>% ggplot(aes(lon, lat)) + geom_path(aes(group = cyclone_id))