lucasnell · November 2, 2022 17:17 · Nov 2, 2022 · Nov 2, 2022
diff --git a/filter-ecoevojobs.R b/filter-ecoevojobs.R
@@ -106,8 +106,7 @@ job_df <- ecoevo_df |>
                        "Open Rank", "Rank Open"),
            Location %in% locs,
            Institution %in% insts,
-           `Review Date` > (Sys.Date() - 7)) |>
-    select(Institution, `Subject Area`, `Review Date`, URL, Rank, Notes)
+           `Review Date` > (Sys.Date() - 7))
 
 
 #'

diff --git a/filter-ecoevojobs.R b/filter-ecoevojobs.R
@@ -0,0 +1,117 @@
+
+#'
+#' This does some basic filtering of the ecoevojobs list to narrow down jobs.
+#' Much of the filtering is based on two text files, one containing locations
+#' to filter for (`locations.txt`), and the other for institutions to filter
+#' for (`institutions.txt`).
+#' Each of these have items separated by newlines.
+#' For example, the top of `locations.txt` might look like this:
+#' ```
+#' Alabama
+#' Alaska
+#' Arizona
+#' Arkansas
+#' California
+#' Colorado
+#' ```
+#'
+
+library(googlesheets4)
+library(tidyverse)
+library(lubridate)
+
+#' ecoevojobs is public, so no need to authenticate
+gs4_deauth()
+
+
+#' Date-time you last manually curated the institution list.
+#' Because I create my institution list based on what's on ecoevojobs,
+#' if there are new jobs that pop up after my curation, then I might miss them
+#' if I used an outdated institutions list.
+#'
+#' ** DON'T JUST CHANGE THIS TO CURRENT DATE-TIME WITHOUT UPDATING  **
+#' ** THE `institutions.txt` FILE.                                  **
+#'
+#' See `>>>>>>>>>>>>>>>>>>>>>>>>` below for how to update this
+#'
+curation_dt <- as.POSIXct("2022-11-02 09:24:00", tz = "America/Los_Angeles")
+
+insts <- read_lines("institutions.txt")
+
+# List of locations I'm interested in:
+locs <- read_lines("locations.txt") |>
+    # In mine, I have some comments that specify how I generated this list,
+    # which I want to skip here:
+    discard(~ str_starts(.x, "#"))
+
+#' Fix weird dates in `Review Date` column
+date_fixer <- function(bad_dates) {
+    better_dates <- map(bad_dates, function(d) {
+        if(is.null(d)) {
+            as.Date(NA)
+        } else if (is.character(d)) {
+            d <- tolower(d)
+            abbr_names <- paste(tolower(month.abb), collapse = "|")
+            if (str_detect(d, abbr_names)) {
+                m_ind <- map_lgl(tolower(month.abb), ~ str_detect(d, .x)) |>
+                    which()
+                if (length(m_ind) > 1) stop("multiple months found")
+                d_ind <- case_when(str_detect(d, "mid") ~ 15L,
+                                   str_detect(d, "early") ~ 1L,
+                                   TRUE ~ 15L)
+                as.Date(sprintf("%s-%i-%i", year(today()), m_ind, d_ind))
+            } else {
+                as.Date(NA)
+            }
+        } else {
+            as.Date(d)
+        }
+    })
+    good_dates <- better_dates |>
+        unlist() |>
+        as.Date(origin = "1970-01-01")
+    return(good_dates)
+}
+
+#' Table of jobs directly from ecoevojobs (this can take a few tries):
+ecoevo_df <- paste0("https://docs.google.com/spreadsheets/d/",
+       "1cqTuSeLtH-Zw7X9ZtnhQxzw3r19Rya9nzdqRW9apTmY/edit#gid=865906911") |>
+    read_sheet(sheet = "Faculty / Permanent Jobs", skip = 1) |>
+    #' With fixed review dates:
+    mutate(`Review Date` = date_fixer(`Review Date`))
+
+
+
+#' >>>>>>>>>>>>>>>>>>>>>>>>
+#' How to update curated institutions list.
+#' Run this to identify unique institutions from new job postings,
+#' manually add any you're interested in to `institutions.txt` file,
+#' then re-run the command `insts <- read_lines("institutions.txt")`
+ecoevo_df |>
+    filter(Timestamp > curation_dt) |>
+    getElement("Institution") |>
+    unique() |>
+    sort()
+
+#'
+#' Filter for...
+#' 1. Tenure track posts that allow assistant professor (including open rank)
+#' 2. Location is in your list of desired ones
+#' 3. Institution is in your list of desired ones
+#' 4. Review date can't be more than a week past due
+#'
+job_df <- ecoevo_df |>
+    filter(Appointment == "Tenure Track",
+           Rank %in% c("Asst / Assoc Prof", "Asst or Assoc Prof", "Asst Prof",
+                       "Open Rank", "Rank Open"),
+           Location %in% locs,
+           Institution %in% insts,
+           `Review Date` > (Sys.Date() - 7)) |>
+    select(Institution, `Subject Area`, `Review Date`, URL, Rank, Notes)
+
+
+#'
+#' Now I write this to a CSV file to manually search by subject area.
+#'
+
+write_csv(job_df, sprintf("filtered_jobs_%s.csv", Sys.Date()))
No results found