# Dataset: https://www.kaggle.com/datasets/wilmerarltstrmberg/recipe-dataset-over-2m import duckdb db1 = duckdb.connect('db1.duck.db') db2 = duckdb.connect('db2.duck.db') db1.sql(""" CREATE OR REPLACE TABLE recipes AS FROM read_csv_auto('recipes_data.csv', header=True) SELECT title, source, cast(NER AS varchar[]) AS NER, site, link; """) db2.sql(""" CREATE TYPE Source AS ENUM ( 'Gathered', 'Recipes1M' ); """) db2.sql(""" CREATE TYPE Site AS ENUM ( FROM read_csv_auto('recipes_data.csv', header=True) SELECT site WHERE site IS NOT NULL ); """) db2.sql(""" create or replace table recipes AS FROM read_csv_auto('recipes_data.csv', header=True) SELECT title, CAST(source AS Source) AS source, cast(NER AS varchar[]) AS NER, CAST(site AS Site) AS site, link; """)