Skip to content

Instantly share code, notes, and snippets.

@mehd-io
Last active June 12, 2025 23:36
Show Gist options
  • Select an option

  • Save mehd-io/9afab092e807a4097864b09e7e9835e9 to your computer and use it in GitHub Desktop.

Select an option

Save mehd-io/9afab092e807a4097864b09e7e9835e9 to your computer and use it in GitHub Desktop.

Revisions

  1. mehd-io renamed this gist Jun 5, 2025. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. mehd-io created this gist Jun 5, 2025.
    66 changes: 66 additions & 0 deletions sql
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    -- Don't forget to first sign-in through sso using aws sso login
    CREATE OR REPLACE SECRET secret(
    TYPE s3,
    PROVIDER credential_chain
    );

    -- first let's create a Ducklake catalog
    CREATE SECRET(
    TYPE postgres,
    HOST '',
    PORT 6543,
    DATABASE postgres,
    USER '',
    PASSWORD ''
    );

    INSTALL ducklake;
    INSTALL postgres;

    from duckdb_extensions();

    ATTACH 'ducklake:postgres:dbname=postgres' AS mehdio_ducklake(DATA_PATH 's3://tmp-mehdio/ducklake/');

    use mehdio_ducklake;

    -- how the data looks like
    DESCRIBE TABLE 's3://us-prd-motherduck-open-datasets/who_ambient_air_quality/csv/who_ambient_air_quality_database_version_2024.csv';
    FROM 's3://us-prd-motherduck-open-datasets/who_ambient_air_quality/csv/who_ambient_air_quality_database_version_2024.csv';

    -- creating my first ducklake table
    CREATE TABLE who_ambient_air_quality_2024 AS
    SELECT *
    FROM
    's3://us-prd-motherduck-open-datasets/who_ambient_air_quality/csv/who_ambient_air_quality_database_version_2024.csv';

    -- show the snapshots
    from mehdio_ducklake.snapshots();

    -- show the parquet files
    FROM
    glob('s3://tmp-mehdio/ducklake/*.parquet');


    ALTER TABLE who_ambient_air_quality_2024
    ADD COLUMN iso2 VARCHAR;

    UPDATE who_ambient_air_quality_2024
    SET iso2 = 'DE'
    WHERE iso3 = 'DEU';

    FROM who_ambient_air_quality_2024 where iso2 = 'DE';


    FROM
    glob('s3://tmp-mehdio/ducklake/*.parquet');

    FROM 's3://tmp-mehdio/ducklake/ducklake-019730a7-55b6-7096-a543-a0eaa871499d-delete.parquet';

    ALTER TABLE who_ambient_air_quality_2024
    DROP COLUMN iso3;

    FROM
    mehdio_ducklake.snapshots();

    SELECT iso2 FROM who_ambient_air_quality_2024 AT (VERSION => 3)
    where iso2 is not null;