Skip to content

Instantly share code, notes, and snippets.

@cboettig
Last active June 9, 2025 00:17
Show Gist options
  • Save cboettig/c90a91ef66ee54b04d5a6d0f9bc6ac09 to your computer and use it in GitHub Desktop.
Save cboettig/c90a91ef66ee54b04d5a6d0f9bc6ac09 to your computer and use it in GitHub Desktop.

Revisions

  1. cboettig revised this gist Jun 9, 2025. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion duckdb-filtering-join.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,5 @@
    # same thing in pure duckdb / SQL

    %%time
    import duckdb as ddb

    con = ddb.connect('test4.db', config=dict(max_memory='500MB')) # Create a DuckDB connection
  2. cboettig created this gist Jun 9, 2025.
    40 changes: 40 additions & 0 deletions duckdb-filtering-join.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    # same thing in pure duckdb / SQL

    %%time
    import duckdb as ddb

    con = ddb.connect('test4.db', config=dict(max_memory='500MB')) # Create a DuckDB connection
    con.install_extension("spatial")
    con.load_extension("spatial")

    con.sql(f"""
    CREATE OR REPLACE VIEW fires AS (
    SELECT CAST(fires.ALARM_DATE AS DATE) AS alarm_date, CAST(fires.CONT_DATE AS DATE) AS control_date, fires.geometry as boundary
    FROM 'https://minio.carlboettiger.info/public-fire/calfire-2023.parquet' AS fires
    WHERE fires.YEAR_ > 2018
    )
    """)


    con.sql(f"""
    CREATE OR REPLACE VIEW nps AS (
    SELECT geometry, MAKE_DATE(2020, 1, 1) AS "start_date", MAKE_DATE(2023, 12, 31) AS "end_date"
    FROM (
    SELECT ST_TRANSFORM(nps.SHAPE, 'EPSG:3857', 'EPSG:4326', TRUE) AS "geometry"
    FROM st_read("/vsicurl/https://minio.carlboettiger.info/public-biodiversity/NPS.gdb") AS nps
    WHERE nps.PARKNAME = 'Joshua Tree'
    )
    )
    """)

    x = con.sql(f"""
    SELECT *
    FROM fires
    INNER JOIN nps ON (
    ST_WITHIN(fires.boundary, nps.geometry)
    AND fires.alarm_date BETWEEN nps.start_date AND nps.end_date
    )
    """)


    x.df()
    31 changes: 31 additions & 0 deletions ibis-geospatial-filtering-joins.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,31 @@
    import ibis
    from ibis import _
    con = ibis.duckdb.connect(extensions=["spatial"])

    jtree = (con
    .read_geo("/vsicurl/https://minio.carlboettiger.info/public-biodiversity/NPS.gdb")
    .filter(_.PARKNAME == "Joshua Tree")
    .mutate(geometry = _.SHAPE.convert('EPSG:3857', 'EPSG:4326'))
    .mutate(start_date = ibis.date('2020-01-01'),
    end_date = ibis.date('2023-12-31')) # arbitrarily add some date ranges
    )

    fires = (con
    .read_parquet("https://minio.carlboettiger.info/public-fire/calfire-2023.parquet")
    .mutate(alarm_date = _.ALARM_DATE.cast('Date'),
    control_date = _.CONT_DATE.cast('Date'))
    .select('alarm_date', 'control_date', 'geometry')
    )

    jtree_fires = (fires
    .join(jtree,
    [
    fires.geometry.within(jtree.geometry),
    fires.alarm_date.between(jtree.start_date, jtree.end_date)
    ])
    )


    #ibis.to_sql(jtree_fires)

    jtree_fires.execute()