Skip to content

Instantly share code, notes, and snippets.

@amotl
Created March 31, 2025 18:10
Show Gist options
  • Select an option

  • Save amotl/2196433103659335a7c341d7c39485c6 to your computer and use it in GitHub Desktop.

Select an option

Save amotl/2196433103659335a7c341d7c39485c6 to your computer and use it in GitHub Desktop.

Revisions

  1. amotl created this gist Mar 31, 2025.
    57 changes: 57 additions & 0 deletions pandas_cratedb_date_type.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,57 @@
    """
    Investigate anomaly with pandas and CrateDB, re. storing `DATE` types that naturally do not use time zones.
    https://github.com/crate/sqlalchemy-cratedb/issues/216
    Setup: Please install dependency packages enumerated below.
    Usage: Please toggle database connection URI per `dburi = this or that`.
    Invoke: Just type `uv run pandas_cratedb_date_type.py`.
    docker run --rm -it --name=cratedb --publish=4200:4200 --publish=5432:5432 --env=CRATE_HEAP_SIZE=2g crate/crate:nightly -Cdiscovery.type=single-node
    docker run --rm -it --name=postgresql --publish=5433:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" postgres:17 postgres -c log_statement=all
    """
    # /// script
    # requires-python = ">=3.12"
    # dependencies = [
    # "psycopg2-binary",
    # "sqlalchemy-cratedb",
    # ]
    # ///
    import sqlalchemy as sa
    import pandas as pd

    data = {
    "date_1": ["2020-01-01", "2021-01-01", "2022-01-01", "2023-01-01", "2027-12-30"],
    "date_2": ["2020-09-24", "2020-10-24", "2020-11-24", "2020-12-24", "2027-09-24"],
    }

    def main():
    print("\ndataframe before")
    df_data = pd.DataFrame.from_dict(data, dtype="datetime64[ns]")
    print(df_data.dtypes)
    print(df_data.sort_values(by="date_1").reset_index(drop=True))

    dburi = "crate://"
    #dburi = "postgresql://postgres@localhost:5433/"

    engine = sa.create_engine(dburi, echo=False)
    conn = engine.connect()

    df_data.to_sql(
    "test_date",
    conn,
    if_exists="replace",
    index=False,
    )
    if dburi.startswith("crate"):
    conn.exec_driver_sql("REFRESH TABLE test_date;")
    #df_load = pd.read_sql_table("test_date", conn, parse_dates={"date_1": "date", "date_2": "date"})
    df_load = pd.read_sql_table("test_date", conn)

    print("\ndataframe after")
    #df_load = df_load.sort_values(by="date_1").reset_index(drop=True)
    print(df_load.dtypes)
    print(df_load)


    if __name__ == "__main__":
    main()