-- Querying Delta Lake, 643715 rows -- Created, optimized and vacuumed with Databricks -- Partitioned by year and month (filepath 1 and 2) -- Spark parquet repartition(1) means 1 file per partition -- Runtime: -- SSMS: 01:31 -- Synapse Workspace top 5000 rows: 03:15 -- Synapse Workspace all rows: 25:29 -- Hard-coding the partition names makes no discernible difference SELECT g_l_account_no_, posting_date document_type, amount, R.filepath(1), R.filepath(2), R.filepath(3) FROM OPENROWSET( BULK 'lri-data-lake/data/delta/g_l_entry.parquet/posting_year=*/posting_month=*/*.parquet', DATA_SOURCE = 'ADLSStorage', FORMAT = 'PARQUET' ) WITH ( [timestamp] BIGINT, [g_l_account_no_] INT, [posting_date] DATETIME2, [document_type] INT, [document_no_] VARCHAR(40), [description] VARCHAR(100), [bal_account_no_] VARCHAR(40), [amount] DECIMAL(38,20), [global_dimension_1_code] VARCHAR(40), [global_dimension_2_code] VARCHAR(40), [user_id] VARCHAR(40), [source_code] VARCHAR(20), [system-created_entry] INT, [prior-year_entry] INT, [job_no_] VARCHAR(40), [quantity] DECIMAL(38,20), [vat_amount] DECIMAL(38,20), [business_unit_code] VARCHAR(20), [journal_batch_name] VARCHAR(20), [reason_code] VARCHAR(20), [gen_posting_type] INT, [gen_bus_posting_group] VARCHAR(20), [gen_prod_posting_group] VARCHAR(20), [bal_account_type] INT, [transaction_no_] INT, [debit_amount] DECIMAL(38,20), [credit_amount] DECIMAL(38,20), [document_date] DATETIME2, [external_document_no_] VARCHAR(40), [source_type] INT, [source_no_] VARCHAR(40), [no_series] VARCHAR(20), [tax_area_code] VARCHAR(40), [tax_liable] INT, [tax_group_code] VARCHAR(20), [use_tax] INT, [vat_bus_posting_group] VARCHAR(20), [vat_prod_posting_group] VARCHAR(20), [additional-currency_amount] DECIMAL(38,20), [add_-currency_debit_amount] DECIMAL(38,20), [add_-currency_credit_amount] DECIMAL(38,20), [close_income_statement_dim_id] INT, [ic_partner_code] VARCHAR(40), [reversed] INT, [reversed_by_entry_no_] INT, [reversed_entry_no_] INT, [prod_order_no_] VARCHAR(40), [fa_entry_type] INT, [fa_entry_no_] INT, [value_entry_no_] INT, [gd3code] VARCHAR(40), [gd4code] VARCHAR(40), [gd5code] VARCHAR(40), [gd6code] VARCHAR(40), [gd7code] VARCHAR(40), [gd8code] VARCHAR(40), [gd9code] VARCHAR(40), [gd10code] VARCHAR(40), [gd11code] VARCHAR(40), [gd12code] VARCHAR(40), [gd13code] VARCHAR(40), [gd14code] VARCHAR(40), [gd15code] VARCHAR(40), [gd16code] VARCHAR(40), [gd17code] VARCHAR(40), [gd18code] VARCHAR(40), [gd19code] VARCHAR(40), [gd20code] VARCHAR(40), [licensee_type] INT, [licensee_no_] VARCHAR(40), [entry_no_] INT, [reconciled] INT, [reconciled_per] DATETIME2, [reconciled_id] INT, [royalty_posting_type_code] VARCHAR(20), [ic_partn_irrelevant_for_cons_] INT, [rights_sales_no_] VARCHAR(40), [rights_owner_no_] VARCHAR(40), [rights_buyer_no_] VARCHAR(40), [creation_date] DATETIME2, [reposting] INT ) [R] WHERE R.filepath(1) = 2020 AND R.filepath(2) = 7