Skip to content

Instantly share code, notes, and snippets.

@lucasnell
Last active November 2, 2022 17:17
Show Gist options
  • Select an option

  • Save lucasnell/f7d47ee906376a0ade59fc913abd2a29 to your computer and use it in GitHub Desktop.

Select an option

Save lucasnell/f7d47ee906376a0ade59fc913abd2a29 to your computer and use it in GitHub Desktop.
#'
#' This does some basic filtering of the ecoevojobs list to narrow down jobs.
#' Much of the filtering is based on two text files, one containing locations
#' to filter for (`locations.txt`), and the other for institutions to filter
#' for (`institutions.txt`).
#' Each of these have items separated by newlines.
#' For example, the top of `locations.txt` might look like this:
#' ```
#' Alabama
#' Alaska
#' Arizona
#' Arkansas
#' California
#' Colorado
#' ```
#'
library(googlesheets4)
library(tidyverse)
library(lubridate)
#' ecoevojobs is public, so no need to authenticate
gs4_deauth()
#' Date-time you last manually curated the institution list.
#' Because I create my institution list based on what's on ecoevojobs,
#' if there are new jobs that pop up after my curation, then I might miss them
#' if I used an outdated institutions list.
#'
#' ** DON'T JUST CHANGE THIS TO CURRENT DATE-TIME WITHOUT UPDATING **
#' ** THE `institutions.txt` FILE. **
#'
#' See `>>>>>>>>>>>>>>>>>>>>>>>>` below for how to update this
#'
curation_dt <- as.POSIXct("2022-11-02 09:24:00", tz = "America/Los_Angeles")
insts <- read_lines("institutions.txt")
# List of locations I'm interested in:
locs <- read_lines("locations.txt") |>
# In mine, I have some comments that specify how I generated this list,
# which I want to skip here:
discard(~ str_starts(.x, "#"))
#' Fix weird dates in `Review Date` column
date_fixer <- function(bad_dates) {
better_dates <- map(bad_dates, function(d) {
if(is.null(d)) {
as.Date(NA)
} else if (is.character(d)) {
d <- tolower(d)
abbr_names <- paste(tolower(month.abb), collapse = "|")
if (str_detect(d, abbr_names)) {
m_ind <- map_lgl(tolower(month.abb), ~ str_detect(d, .x)) |>
which()
if (length(m_ind) > 1) stop("multiple months found")
d_ind <- case_when(str_detect(d, "mid") ~ 15L,
str_detect(d, "early") ~ 1L,
TRUE ~ 15L)
as.Date(sprintf("%s-%i-%i", year(today()), m_ind, d_ind))
} else {
as.Date(NA)
}
} else {
as.Date(d)
}
})
good_dates <- better_dates |>
unlist() |>
as.Date(origin = "1970-01-01")
return(good_dates)
}
#' Table of jobs directly from ecoevojobs (this can take a few tries):
ecoevo_df <- paste0("https://docs.google.com/spreadsheets/d/",
"1cqTuSeLtH-Zw7X9ZtnhQxzw3r19Rya9nzdqRW9apTmY/edit#gid=865906911") |>
read_sheet(sheet = "Faculty / Permanent Jobs", skip = 1) |>
#' With fixed review dates:
mutate(`Review Date` = date_fixer(`Review Date`))
#' >>>>>>>>>>>>>>>>>>>>>>>>
#' How to update curated institutions list.
#' Run this to identify unique institutions from new job postings,
#' manually add any you're interested in to `institutions.txt` file,
#' then re-run the command `insts <- read_lines("institutions.txt")`
ecoevo_df |>
filter(Timestamp > curation_dt) |>
getElement("Institution") |>
unique() |>
sort()
#'
#' Filter for...
#' 1. Tenure track posts that allow assistant professor (including open rank)
#' 2. Location is in your list of desired ones
#' 3. Institution is in your list of desired ones
#' 4. Review date can't be more than a week past due
#'
job_df <- ecoevo_df |>
filter(Appointment == "Tenure Track",
Rank %in% c("Asst / Assoc Prof", "Asst or Assoc Prof", "Asst Prof",
"Open Rank", "Rank Open"),
Location %in% locs,
Institution %in% insts,
`Review Date` > (Sys.Date() - 7))
#'
#' Now I write this to a CSV file to manually search by subject area.
#'
write_csv(job_df, sprintf("filtered_jobs_%s.csv", Sys.Date()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment