Skip to content

Instantly share code, notes, and snippets.

@bsurial
Last active December 6, 2023 12:53
Show Gist options
  • Select an option

  • Save bsurial/e057a685b231e193ee6e394c1ce429b0 to your computer and use it in GitHub Desktop.

Select an option

Save bsurial/e057a685b231e193ee6e394c1ce429b0 to your computer and use it in GitHub Desktop.

Revisions

  1. bsurial revised this gist Oct 25, 2020. 1 changed file with 21 additions and 4 deletions.
    25 changes: 21 additions & 4 deletions non-equi-join.R
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,7 @@
    library(tidyverse)
    library(data.table)

    # Create some data
    # Create som edata
    dates <- tibble(id = 1,
    labdate = lubridate::dmy(c("01-01-2010", "01-02-2010", "01-03-2010")))

    @@ -21,13 +21,12 @@ dates
    #> 2: 1 2010-02-01
    #> 3: 1 2010-03-01


    fupdate
    #> id fupdate LB UB
    #> 1: 1 2010-02-02 2010-01-19 2010-02-16


    # In data.table joins, always take the right-hand-side first (this is what you match to the other df)
    # Here, I want to keep all rows of "dates"
    fupdate[dates,
    on = .(id, LB <= labdate, UB >= labdate),
    # Always be explicit with i.xx (inner data frame) and x.xx (outer data frame)
    @@ -42,5 +41,23 @@ fupdate[dates,
    #> 2 1 2010-02-01 2010-02-02
    #> 3 1 2010-03-01 NA

    # See here for more information on data.tables joins: https://franknarf1.github.io/r-tutorial/_book/tables.html#dt-joins



    # We can also use the fuzzyjoin package, but it is much slower
    library(fuzzyjoin)

    dates %>%
    fuzzy_left_join(fupdate,
    by = c("id" = "id",
    "labdate" = "LB",
    "labdate" = "UB"),
    # match_fun's should be quoted as `` and not ""
    match_fun = list(`==`, `>=`, `<=`)) %>%
    select(id = id.x, labdate, fupdate)

    # See here for more information: https://franknarf1.github.io/r-tutorial/_book/tables.html#dt-joins
    #> id labdate fupdate
    #> 1 1 2010-01-01 <NA>
    #> 2 1 2010-02-01 2010-02-02
    #> 3 1 2010-03-01 <NA>
  2. bsurial revised this gist Oct 25, 2020. 1 changed file with 17 additions and 4 deletions.
    21 changes: 17 additions & 4 deletions non-equi-join.R
    Original file line number Diff line number Diff line change
    @@ -1,12 +1,12 @@
    library(tidyverse)
    library(data.table)


    # Create some data
    dates <- tibble(id = 1,
    labdate = lubridate::dmy(c("01-01-2010", "01-02-2010", "01-03-2010")))
    labdate = lubridate::dmy(c("01-01-2010", "01-02-2010", "01-03-2010")))

    fupdate <- tibble(id = 1,
    fupdate = lubridate::dmy("02-02-2010")) %>%
    fupdate = lubridate::dmy("02-02-2010")) %>%
    #set window of +/- 14 days
    mutate(LB = fupdate - 14,
    UB = fupdate + 14)
    @@ -15,19 +15,32 @@ fupdate <- tibble(id = 1,
    # For a left-join with data.table setDT
    setDT(dates);setDT(fupdate)

    dates
    #> id labdate
    #> 1: 1 2010-01-01
    #> 2: 1 2010-02-01
    #> 3: 1 2010-03-01


    fupdate
    #> id fupdate LB UB
    #> 1: 1 2010-02-02 2010-01-19 2010-02-16

    # In data.table joins, always take the right-hand-side first (this is what you match to the other df)
    # Here, I want to keep all rows of "dates"
    fupdate[dates,
    on = .(id, LB <= labdate, UB >= labdate),
    # Always be explicit with i.xx (inner data frame) and x.xx (outer data frame)
    .(id, labdate = i.labdate, fupdate = x.fupdate)] %>%
    # set back to tibble
    as_tibble

    #> # A tibble: 3 x 3
    #> id labdate fupdate
    #> <dbl> <date> <date>
    #> 1 1 2010-01-01 NA
    #> 2 1 2010-02-01 2010-02-02
    #> 3 1 2010-03-01 NA


    # See here for more information: https://franknarf1.github.io/r-tutorial/_book/tables.html#dt-joins
  3. bsurial revised this gist Oct 25, 2020. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion non-equi-join.R
    Original file line number Diff line number Diff line change
    @@ -28,4 +28,6 @@ fupdate[dates,
    #> <dbl> <date> <date>
    #> 1 1 2010-01-01 NA
    #> 2 1 2010-02-01 2010-02-02
    #> 3 1 2010-03-01 NA
    #> 3 1 2010-03-01 NA

    # See here for more information: https://franknarf1.github.io/r-tutorial/_book/tables.html#dt-joins
  4. bsurial created this gist Oct 25, 2020.
    31 changes: 31 additions & 0 deletions non-equi-join.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,31 @@
    library(tidyverse)
    library(data.table)


    dates <- tibble(id = 1,
    labdate = lubridate::dmy(c("01-01-2010", "01-02-2010", "01-03-2010")))

    fupdate <- tibble(id = 1,
    fupdate = lubridate::dmy("02-02-2010")) %>%
    #set window of +/- 14 days
    mutate(LB = fupdate - 14,
    UB = fupdate + 14)


    # For a left-join with data.table setDT
    setDT(dates);setDT(fupdate)

    # In data.table joins, always take the right-hand-side first (this is what you match to the other df)
    fupdate[dates,
    on = .(id, LB <= labdate, UB >= labdate),
    # Always be explicit with i.xx (inner data frame) and x.xx (outer data frame)
    .(id, labdate = i.labdate, fupdate = x.fupdate)] %>%
    # set back to tibble
    as_tibble

    #> # A tibble: 3 x 3
    #> id labdate fupdate
    #> <dbl> <date> <date>
    #> 1 1 2010-01-01 NA
    #> 2 1 2010-02-01 2010-02-02
    #> 3 1 2010-03-01 NA