Created
March 31, 2025 18:10
-
-
Save amotl/2196433103659335a7c341d7c39485c6 to your computer and use it in GitHub Desktop.
Revisions
-
amotl created this gist
Mar 31, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,57 @@ """ Investigate anomaly with pandas and CrateDB, re. storing `DATE` types that naturally do not use time zones. https://github.com/crate/sqlalchemy-cratedb/issues/216 Setup: Please install dependency packages enumerated below. Usage: Please toggle database connection URI per `dburi = this or that`. Invoke: Just type `uv run pandas_cratedb_date_type.py`. docker run --rm -it --name=cratedb --publish=4200:4200 --publish=5432:5432 --env=CRATE_HEAP_SIZE=2g crate/crate:nightly -Cdiscovery.type=single-node docker run --rm -it --name=postgresql --publish=5433:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" postgres:17 postgres -c log_statement=all """ # /// script # requires-python = ">=3.12" # dependencies = [ # "psycopg2-binary", # "sqlalchemy-cratedb", # ] # /// import sqlalchemy as sa import pandas as pd data = { "date_1": ["2020-01-01", "2021-01-01", "2022-01-01", "2023-01-01", "2027-12-30"], "date_2": ["2020-09-24", "2020-10-24", "2020-11-24", "2020-12-24", "2027-09-24"], } def main(): print("\ndataframe before") df_data = pd.DataFrame.from_dict(data, dtype="datetime64[ns]") print(df_data.dtypes) print(df_data.sort_values(by="date_1").reset_index(drop=True)) dburi = "crate://" #dburi = "postgresql://postgres@localhost:5433/" engine = sa.create_engine(dburi, echo=False) conn = engine.connect() df_data.to_sql( "test_date", conn, if_exists="replace", index=False, ) if dburi.startswith("crate"): conn.exec_driver_sql("REFRESH TABLE test_date;") #df_load = pd.read_sql_table("test_date", conn, parse_dates={"date_1": "date", "date_2": "date"}) df_load = pd.read_sql_table("test_date", conn) print("\ndataframe after") #df_load = df_load.sort_values(by="date_1").reset_index(drop=True) print(df_load.dtypes) print(df_load) if __name__ == "__main__": main()