""" Investigate anomaly with pandas and CrateDB, re. storing `DATE` types that naturally do not use time zones. https://github.com/crate/sqlalchemy-cratedb/issues/216 Setup: Please install dependency packages enumerated below. Usage: Please toggle database connection URI per `dburi = this or that`. Invoke: Just type `uv run pandas_cratedb_date_type.py`. docker run --rm -it --name=cratedb --publish=4200:4200 --publish=5432:5432 --env=CRATE_HEAP_SIZE=2g crate/crate:nightly -Cdiscovery.type=single-node docker run --rm -it --name=postgresql --publish=5433:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" postgres:17 postgres -c log_statement=all """ # /// script # requires-python = ">=3.12" # dependencies = [ # "psycopg2-binary", # "sqlalchemy-cratedb", # ] # /// import sqlalchemy as sa import pandas as pd data = { "date_1": ["2020-01-01", "2021-01-01", "2022-01-01", "2023-01-01", "2027-12-30"], "date_2": ["2020-09-24", "2020-10-24", "2020-11-24", "2020-12-24", "2027-09-24"], } def main(): print("\ndataframe before") df_data = pd.DataFrame.from_dict(data, dtype="datetime64[ns]") print(df_data.dtypes) print(df_data.sort_values(by="date_1").reset_index(drop=True)) dburi = "crate://" #dburi = "postgresql://postgres@localhost:5433/" engine = sa.create_engine(dburi, echo=False) conn = engine.connect() df_data.to_sql( "test_date", conn, if_exists="replace", index=False, ) if dburi.startswith("crate"): conn.exec_driver_sql("REFRESH TABLE test_date;") #df_load = pd.read_sql_table("test_date", conn, parse_dates={"date_1": "date", "date_2": "date"}) df_load = pd.read_sql_table("test_date", conn) print("\ndataframe after") #df_load = df_load.sort_values(by="date_1").reset_index(drop=True) print(df_load.dtypes) print(df_load) if __name__ == "__main__": main()