Last active
November 2, 2022 17:17
-
-
Save lucasnell/f7d47ee906376a0ade59fc913abd2a29 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' | |
| #' This does some basic filtering of the ecoevojobs list to narrow down jobs. | |
| #' Much of the filtering is based on two text files, one containing locations | |
| #' to filter for (`locations.txt`), and the other for institutions to filter | |
| #' for (`institutions.txt`). | |
| #' Each of these have items separated by newlines. | |
| #' For example, the top of `locations.txt` might look like this: | |
| #' ``` | |
| #' Alabama | |
| #' Alaska | |
| #' Arizona | |
| #' Arkansas | |
| #' California | |
| #' Colorado | |
| #' ``` | |
| #' | |
| library(googlesheets4) | |
| library(tidyverse) | |
| library(lubridate) | |
| #' ecoevojobs is public, so no need to authenticate | |
| gs4_deauth() | |
| #' Date-time you last manually curated the institution list. | |
| #' Because I create my institution list based on what's on ecoevojobs, | |
| #' if there are new jobs that pop up after my curation, then I might miss them | |
| #' if I used an outdated institutions list. | |
| #' | |
| #' ** DON'T JUST CHANGE THIS TO CURRENT DATE-TIME WITHOUT UPDATING ** | |
| #' ** THE `institutions.txt` FILE. ** | |
| #' | |
| #' See `>>>>>>>>>>>>>>>>>>>>>>>>` below for how to update this | |
| #' | |
| curation_dt <- as.POSIXct("2022-11-02 09:24:00", tz = "America/Los_Angeles") | |
| insts <- read_lines("institutions.txt") | |
| # List of locations I'm interested in: | |
| locs <- read_lines("locations.txt") |> | |
| # In mine, I have some comments that specify how I generated this list, | |
| # which I want to skip here: | |
| discard(~ str_starts(.x, "#")) | |
| #' Fix weird dates in `Review Date` column | |
| date_fixer <- function(bad_dates) { | |
| better_dates <- map(bad_dates, function(d) { | |
| if(is.null(d)) { | |
| as.Date(NA) | |
| } else if (is.character(d)) { | |
| d <- tolower(d) | |
| abbr_names <- paste(tolower(month.abb), collapse = "|") | |
| if (str_detect(d, abbr_names)) { | |
| m_ind <- map_lgl(tolower(month.abb), ~ str_detect(d, .x)) |> | |
| which() | |
| if (length(m_ind) > 1) stop("multiple months found") | |
| d_ind <- case_when(str_detect(d, "mid") ~ 15L, | |
| str_detect(d, "early") ~ 1L, | |
| TRUE ~ 15L) | |
| as.Date(sprintf("%s-%i-%i", year(today()), m_ind, d_ind)) | |
| } else { | |
| as.Date(NA) | |
| } | |
| } else { | |
| as.Date(d) | |
| } | |
| }) | |
| good_dates <- better_dates |> | |
| unlist() |> | |
| as.Date(origin = "1970-01-01") | |
| return(good_dates) | |
| } | |
| #' Table of jobs directly from ecoevojobs (this can take a few tries): | |
| ecoevo_df <- paste0("https://docs.google.com/spreadsheets/d/", | |
| "1cqTuSeLtH-Zw7X9ZtnhQxzw3r19Rya9nzdqRW9apTmY/edit#gid=865906911") |> | |
| read_sheet(sheet = "Faculty / Permanent Jobs", skip = 1) |> | |
| #' With fixed review dates: | |
| mutate(`Review Date` = date_fixer(`Review Date`)) | |
| #' >>>>>>>>>>>>>>>>>>>>>>>> | |
| #' How to update curated institutions list. | |
| #' Run this to identify unique institutions from new job postings, | |
| #' manually add any you're interested in to `institutions.txt` file, | |
| #' then re-run the command `insts <- read_lines("institutions.txt")` | |
| ecoevo_df |> | |
| filter(Timestamp > curation_dt) |> | |
| getElement("Institution") |> | |
| unique() |> | |
| sort() | |
| #' | |
| #' Filter for... | |
| #' 1. Tenure track posts that allow assistant professor (including open rank) | |
| #' 2. Location is in your list of desired ones | |
| #' 3. Institution is in your list of desired ones | |
| #' 4. Review date can't be more than a week past due | |
| #' | |
| job_df <- ecoevo_df |> | |
| filter(Appointment == "Tenure Track", | |
| Rank %in% c("Asst / Assoc Prof", "Asst or Assoc Prof", "Asst Prof", | |
| "Open Rank", "Rank Open"), | |
| Location %in% locs, | |
| Institution %in% insts, | |
| `Review Date` > (Sys.Date() - 7)) | |
| #' | |
| #' Now I write this to a CSV file to manually search by subject area. | |
| #' | |
| write_csv(job_df, sprintf("filtered_jobs_%s.csv", Sys.Date())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment