Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active November 16, 2024 21:52
Show Gist options
  • Save benmarwick/0fe946d173b9e9e2d47373df9650712a to your computer and use it in GitHub Desktop.
Save benmarwick/0fe946d173b9e9e2d47373df9650712a to your computer and use it in GitHub Desktop.

Revisions

  1. benmarwick revised this gist Nov 16, 2024. 1 changed file with 56 additions and 0 deletions.
    56 changes: 56 additions & 0 deletions prepare-radiocarbon-data-for-analyses-signbase.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,56 @@
    library(tidyverse)
    library(rcarbon)

    signbase <- read_csv("signBase_Version1.0.csv")

    # clean up and separate the data in the radiocarbon age columns
    # so we can use it for plotting and analyses. This will generate some
    # warnings, that's ok, nothing to worry about
    signbase_years <-
    signbase %>%
    drop_na(date_bp_max_min) %>%
    mutate(date_bp_max_min = str_replace_all(date_bp_max_min, "\\+\\/\\-", "±")) %>%
    mutate(date_bp_max_min = str_replace_all(date_bp_max_min, "\\+", "±")) %>%
    separate(date_bp_max_min,
    sep = " - ",
    c("date_bp_max", "date_bp_min"),
    remove = FALSE) %>%
    mutate(date_bp_max = ifelse(str_detect(date_bp_max, "\\/"),
    str_extract(date_bp_max, ".*?(?=\\/)"),
    date_bp_max)) %>%
    mutate(date_bp_min = ifelse(str_detect(date_bp_min, "\\/"),
    str_extract(date_bp_min, ".*?(?=\\/)"),
    date_bp_min)) %>%
    separate(date_bp_max,
    sep = "±",
    c("date_bp_max_age",
    "date_bp_max_error")) %>%
    separate(date_bp_min,
    sep = "±",
    c("date_bp_min_age",
    "date_bp_min_error")) %>%
    drop_na(date_bp_max_age,
    date_bp_max_error) %>%
    mutate(date_bp_max_age = parse_number(date_bp_max_age),
    date_bp_max_error = parse_number(date_bp_max_error))

    # calibrate the radiocarbon ages and join the calibration results back
    # to the main data frame of all the object, location, and sign data.
    # Use this signbase_years_cal data frame for all your anlyses now.
    # Use the column "MedianBP" for visualising and analysing ages. That
    # is calendar years Before the Present
    signbase_years_cal <-
    calibrate(signbase_years$date_bp_max_age,
    signbase_years$date_bp_max_error,
    ids = signbase_years$object_id,
    verbose = FALSE) %>%
    summary() %>%
    tibble() %>%
    left_join(signbase,
    join_by("DateID" == "object_id"))

    # for example, we can make a histogram to see which time periods are most
    # frequently represented in the data
    ggplot(signbase_years_cal) +
    aes(MedianBP) +
    geom_histogram()
  2. benmarwick renamed this gist Nov 16, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. benmarwick created this gist Nov 16, 2024.
    42 changes: 42 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,42 @@
    library(tidyverse)
    library(rnaturalearth)
    library(ggrepel)

    # import the data from the paper https://www.nature.com/articles/s41597-020-00704-x
    signbase <- read_csv("signBase_Version1.0.csv")

    # prepare simple features objects for a base map of the coastlines
    # and country boundaries of Europe
    world <- ne_countries(scale = "medium", returnclass = "sf")
    Europe <- world[which(world$continent == "Europe"),]

    # make a map of the signbase sites!
    ggplot() +
    # plot the base map of European countries and coastlines
    geom_sf(data = Europe) +
    # crop the map to zoom in on the parts of Europe where the sites are
    coord_sf(xlim = c(-10,30),
    ylim = c(35,53),
    expand = FALSE) +
    # add the layer of signbase sites, here I'm using the raw data, but you can
    # also use a data frame that you have done some calculations on. You just need
    # the longitude and latitude columns to be present for this to work
    geom_point(data = signbase,
    # point colour indicates 'country', feel free to explore and
    # change this to other columns in the signbase_sf data frame
    aes(x = longitude,
    y = latitude,
    # point colour indicates 'country', feel free to explore and
    # change this to other columns in the signbase data frame
    colour = country)) +
    # add labels for the site names
    geom_text_repel(data = signbase %>%
    # only one label per site, otherwise it's too croweded to read
    distinct(site_name,
    .keep_all = TRUE),
    aes(x = longitude ,
    y = latitude,
    label = site_name),
    max.overlaps = 100,
    size = 2) +
    theme_minimal()