Last active
November 2, 2022 17:17
-
-
Save lucasnell/f7d47ee906376a0ade59fc913abd2a29 to your computer and use it in GitHub Desktop.
Revisions
-
lucasnell revised this gist
Nov 2, 2022 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -106,8 +106,7 @@ job_df <- ecoevo_df |> "Open Rank", "Rank Open"), Location %in% locs, Institution %in% insts, `Review Date` > (Sys.Date() - 7)) #' -
lucasnell created this gist
Nov 2, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,117 @@ #' #' This does some basic filtering of the ecoevojobs list to narrow down jobs. #' Much of the filtering is based on two text files, one containing locations #' to filter for (`locations.txt`), and the other for institutions to filter #' for (`institutions.txt`). #' Each of these have items separated by newlines. #' For example, the top of `locations.txt` might look like this: #' ``` #' Alabama #' Alaska #' Arizona #' Arkansas #' California #' Colorado #' ``` #' library(googlesheets4) library(tidyverse) library(lubridate) #' ecoevojobs is public, so no need to authenticate gs4_deauth() #' Date-time you last manually curated the institution list. #' Because I create my institution list based on what's on ecoevojobs, #' if there are new jobs that pop up after my curation, then I might miss them #' if I used an outdated institutions list. #' #' ** DON'T JUST CHANGE THIS TO CURRENT DATE-TIME WITHOUT UPDATING ** #' ** THE `institutions.txt` FILE. ** #' #' See `>>>>>>>>>>>>>>>>>>>>>>>>` below for how to update this #' curation_dt <- as.POSIXct("2022-11-02 09:24:00", tz = "America/Los_Angeles") insts <- read_lines("institutions.txt") # List of locations I'm interested in: locs <- read_lines("locations.txt") |> # In mine, I have some comments that specify how I generated this list, # which I want to skip here: discard(~ str_starts(.x, "#")) #' Fix weird dates in `Review Date` column date_fixer <- function(bad_dates) { better_dates <- map(bad_dates, function(d) { if(is.null(d)) { as.Date(NA) } else if (is.character(d)) { d <- tolower(d) abbr_names <- paste(tolower(month.abb), collapse = "|") if (str_detect(d, abbr_names)) { m_ind <- map_lgl(tolower(month.abb), ~ str_detect(d, .x)) |> which() if (length(m_ind) > 1) stop("multiple months found") d_ind <- case_when(str_detect(d, "mid") ~ 15L, str_detect(d, "early") ~ 1L, TRUE ~ 15L) as.Date(sprintf("%s-%i-%i", year(today()), m_ind, d_ind)) } else { as.Date(NA) } } else { as.Date(d) } }) good_dates <- better_dates |> unlist() |> as.Date(origin = "1970-01-01") return(good_dates) } #' Table of jobs directly from ecoevojobs (this can take a few tries): ecoevo_df <- paste0("https://docs.google.com/spreadsheets/d/", "1cqTuSeLtH-Zw7X9ZtnhQxzw3r19Rya9nzdqRW9apTmY/edit#gid=865906911") |> read_sheet(sheet = "Faculty / Permanent Jobs", skip = 1) |> #' With fixed review dates: mutate(`Review Date` = date_fixer(`Review Date`)) #' >>>>>>>>>>>>>>>>>>>>>>>> #' How to update curated institutions list. #' Run this to identify unique institutions from new job postings, #' manually add any you're interested in to `institutions.txt` file, #' then re-run the command `insts <- read_lines("institutions.txt")` ecoevo_df |> filter(Timestamp > curation_dt) |> getElement("Institution") |> unique() |> sort() #' #' Filter for... #' 1. Tenure track posts that allow assistant professor (including open rank) #' 2. Location is in your list of desired ones #' 3. Institution is in your list of desired ones #' 4. Review date can't be more than a week past due #' job_df <- ecoevo_df |> filter(Appointment == "Tenure Track", Rank %in% c("Asst / Assoc Prof", "Asst or Assoc Prof", "Asst Prof", "Open Rank", "Rank Open"), Location %in% locs, Institution %in% insts, `Review Date` > (Sys.Date() - 7)) |> select(Institution, `Subject Area`, `Review Date`, URL, Rank, Notes) #' #' Now I write this to a CSV file to manually search by subject area. #' write_csv(job_df, sprintf("filtered_jobs_%s.csv", Sys.Date()))