Skip to content

Instantly share code, notes, and snippets.

@jyates
Last active June 2, 2016 07:20
Show Gist options
  • Select an option

  • Save jyates/f11eb44a44af715b483859f497b9ea89 to your computer and use it in GitHub Desktop.

Select an option

Save jyates/f11eb44a44af715b483859f497b9ea89 to your computer and use it in GitHub Desktop.

Revisions

  1. jyates renamed this gist Jun 2, 2016. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. jyates revised this gist Jun 2, 2016. 4 changed files with 35 additions and 22 deletions.
    14 changes: 13 additions & 1 deletion logical plan → expanded table planning
    Original file line number Diff line number Diff line change
    @@ -8,4 +8,16 @@ LogicalProject(*=[$0]): rowcount = 1500.0, cumulative cost = {3200.0 rows, 1702.
    DrillProjectRel(*=[$0]): rowcount = 1.0, cumulative cost = {4.0 rows, 22.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 55
    DrillJoinRel(condition=[=($0, $1)], joinType=[inner]): rowcount = 1.0, cumulative cost = {4.0 rows, 22.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 60
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 42
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 41
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 41

    2016-06-02 00:02:00 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[151] - Drill Physical:
    00-00 Screen : rowType = RecordType(ANY *): rowcount = 1.0, cumulative cost = {6.1 rows, 10.1 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 624
    00-01 Project(*=[$0]) : rowType = RecordType(ANY *): rowcount = 1.0, cumulative cost = {6.0 rows, 10.0 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 623
    00-02 MergeJoin(condition=[=($0, $1)], joinType=[inner]) : rowType = RecordType(ANY id, ANY id0): rowcount = 1.0, cumulative cost = {6.0 rows, 10.0 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 622
    00-04 SelectionVectorRemover : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 617
    00-06 Sort(sort0=[$0], dir0=[ASC]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {1.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 616
    00-08 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]]]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 615
    00-03 Project(id0=[$0]) : rowType = RecordType(ANY id0): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 621
    00-05 SelectionVectorRemover : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 620
    00-07 Sort(sort0=[$0], dir0=[ASC]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {1.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 619
    00-09 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 618
    11 changes: 0 additions & 11 deletions physical plan
    Original file line number Diff line number Diff line change
    @@ -1,11 +0,0 @@
    2016-06-02 00:02:00 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[151] - Drill Physical:
    00-00 Screen : rowType = RecordType(ANY *): rowcount = 1.0, cumulative cost = {6.1 rows, 10.1 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 624
    00-01 Project(*=[$0]) : rowType = RecordType(ANY *): rowcount = 1.0, cumulative cost = {6.0 rows, 10.0 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 623
    00-02 MergeJoin(condition=[=($0, $1)], joinType=[inner]) : rowType = RecordType(ANY id, ANY id0): rowcount = 1.0, cumulative cost = {6.0 rows, 10.0 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 622
    00-04 SelectionVectorRemover : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 617
    00-06 Sort(sort0=[$0], dir0=[ASC]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {1.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 616
    00-08 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]]]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 615
    00-03 Project(id0=[$0]) : rowType = RecordType(ANY id0): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 621
    00-05 SelectionVectorRemover : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 620
    00-07 Sort(sort0=[$0], dir0=[ASC]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {1.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 619
    00-09 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 618
    10 changes: 0 additions & 10 deletions raw-sql-physical-plan
    Original file line number Diff line number Diff line change
    @@ -1,10 +0,0 @@
    2016-06-02 00:03:50 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[151] - Drill Physical:
    00-00 Screen : rowType = RecordType(ANY *, ANY *0): rowcount = 1.0, cumulative cost = {2.1 rows, 20.1 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 325
    00-01 ProjectAllowDup(*=[$0], *0=[$1]) : rowType = RecordType(ANY *, ANY *0): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 324
    00-02 Project(T0¦¦*=[$0], T1¦¦*=[$2]) : rowType = RecordType(ANY T0¦¦*, ANY T1¦¦*): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 323
    00-03 HashJoin(condition=[=($1, $3)], joinType=[full]) : rowType = RecordType(ANY T0¦¦*, ANY companykey, ANY T1¦¦*, ANY companykey0): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 322
    00-05 Project(T0¦¦*=[$0], companykey=[$1]) : rowType = RecordType(ANY T0¦¦*, ANY companykey): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 318
    00-07 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-1-bbebfbe4-baf0-4dc2-ae79-fae61c0da3fc.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-1-bbebfbe4-baf0-4dc2-ae79-fae61c0da3fc.json]]]) : rowType = (DrillRecordRow[*, companykey]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 317
    00-04 Project(T1¦¦*=[$0], companykey0=[$1]) : rowType = RecordType(ANY T1¦¦*, ANY companykey0): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 321
    00-06 Project(T1¦¦*=[$0], companykey=[$1]) : rowType = RecordType(ANY T1¦¦*, ANY companykey): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 320
    00-08 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-2-7ce87e1f-9709-4417-88a9-6475e3a2517e.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-2-7ce87e1f-9709-4417-88a9-6475e3a2517e.json]]]) : rowType = (DrillRecordRow[*, companykey]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 319
    22 changes: 22 additions & 0 deletions raw-sql-plan
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,22 @@
    2016-06-02 00:09:29 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[140] - HEP_BOTTOM_UP:Directory Prune Planning (6ms):
    LogicalProject(*=[$0], *0=[$2]): rowcount = 1500.0, cumulative cost = {3200.0 rows, 3202.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 19
    LogicalJoin(condition=[=($1, $3)], joinType=[full]): rowcount = 1500.0, cumulative cost = {1700.0 rows, 202.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 17
    EnumerableTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-1-0728a834-81bd-48f1-9f2e-dc3594d46eda.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 5
    EnumerableTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-2-e0b457ae-798a-4fe7-8774-4d7286d18822.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 6

    2016-06-02 00:09:29 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[140] - HEP_BOTTOM_UP:LOPT Join Planning (14ms):
    DrillProjectRel(*=[$0], *0=[$2]): rowcount = 1.0, cumulative cost = {4.0 rows, 20020.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 54
    DrillJoinRel(condition=[=($1, $3)], joinType=[full]): rowcount = 1.0, cumulative cost = {4.0 rows, 20020.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 59
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-1-0728a834-81bd-48f1-9f2e-dc3594d46eda.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-1-0728a834-81bd-48f1-9f2e-dc3594d46eda.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-1-0728a834-81bd-48f1-9f2e-dc3594d46eda.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 10000.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 41
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-2-e0b457ae-798a-4fe7-8774-4d7286d18822.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-2-e0b457ae-798a-4fe7-8774-4d7286d18822.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit6653229530728832749/drill/test-2-e0b457ae-798a-4fe7-8774-4d7286d18822.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 10000.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 40

    2016-06-02 00:03:50 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[151] - Drill Physical:
    00-00 Screen : rowType = RecordType(ANY *, ANY *0): rowcount = 1.0, cumulative cost = {2.1 rows, 20.1 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 325
    00-01 ProjectAllowDup(*=[$0], *0=[$1]) : rowType = RecordType(ANY *, ANY *0): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 324
    00-02 Project(T0¦¦*=[$0], T1¦¦*=[$2]) : rowType = RecordType(ANY T0¦¦*, ANY T1¦¦*): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 323
    00-03 HashJoin(condition=[=($1, $3)], joinType=[full]) : rowType = RecordType(ANY T0¦¦*, ANY companykey, ANY T1¦¦*, ANY companykey0): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 322
    00-05 Project(T0¦¦*=[$0], companykey=[$1]) : rowType = RecordType(ANY T0¦¦*, ANY companykey): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 318
    00-07 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-1-bbebfbe4-baf0-4dc2-ae79-fae61c0da3fc.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-1-bbebfbe4-baf0-4dc2-ae79-fae61c0da3fc.json]]]) : rowType = (DrillRecordRow[*, companykey]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 317
    00-04 Project(T1¦¦*=[$0], companykey0=[$1]) : rowType = RecordType(ANY T1¦¦*, ANY companykey0): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 321
    00-06 Project(T1¦¦*=[$0], companykey=[$1]) : rowType = RecordType(ANY T1¦¦*, ANY companykey): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 320
    00-08 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-2-7ce87e1f-9709-4417-88a9-6475e3a2517e.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-2-7ce87e1f-9709-4417-88a9-6475e3a2517e.json]]]) : rowType = (DrillRecordRow[*, companykey]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 319
  3. jyates revised this gist Jun 2, 2016. 1 changed file with 10 additions and 0 deletions.
    10 changes: 10 additions & 0 deletions raw-sql-physical-plan
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    2016-06-02 00:03:50 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[151] - Drill Physical:
    00-00 Screen : rowType = RecordType(ANY *, ANY *0): rowcount = 1.0, cumulative cost = {2.1 rows, 20.1 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 325
    00-01 ProjectAllowDup(*=[$0], *0=[$1]) : rowType = RecordType(ANY *, ANY *0): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 324
    00-02 Project(T0¦¦*=[$0], T1¦¦*=[$2]) : rowType = RecordType(ANY T0¦¦*, ANY T1¦¦*): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 323
    00-03 HashJoin(condition=[=($1, $3)], joinType=[full]) : rowType = RecordType(ANY T0¦¦*, ANY companykey, ANY T1¦¦*, ANY companykey0): rowcount = 1.0, cumulative cost = {2.0 rows, 20.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 322
    00-05 Project(T0¦¦*=[$0], companykey=[$1]) : rowType = RecordType(ANY T0¦¦*, ANY companykey): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 318
    00-07 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-1-bbebfbe4-baf0-4dc2-ae79-fae61c0da3fc.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-1-bbebfbe4-baf0-4dc2-ae79-fae61c0da3fc.json]]]) : rowType = (DrillRecordRow[*, companykey]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 317
    00-04 Project(T1¦¦*=[$0], companykey0=[$1]) : rowType = RecordType(ANY T1¦¦*, ANY companykey0): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 321
    00-06 Project(T1¦¦*=[$0], companykey=[$1]) : rowType = RecordType(ANY T1¦¦*, ANY companykey): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 320
    00-08 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-2-7ce87e1f-9709-4417-88a9-6475e3a2517e.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit7587719243224120904/drill/test-2-7ce87e1f-9709-4417-88a9-6475e3a2517e.json]]]) : rowType = (DrillRecordRow[*, companykey]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 319
  4. jyates revised this gist Jun 2, 2016. 4 changed files with 26 additions and 93 deletions.
    32 changes: 0 additions & 32 deletions RelOptUtil.java snippet
    Original file line number Diff line number Diff line change
    @@ -1,32 +0,0 @@
    private static void splitJoinCondition(...)
    ....
    RexInputRef leftField;
    RexInputRef rightField;
    if ((op0.getIndex() < leftFieldCount)
    && (op1.getIndex() >= leftFieldCount)) {
    // Arguments were of form 'op0 = op1'
    leftField = op0;
    rightField = op1;
    } else if (
    (op1.getIndex() < leftFieldCount)
    && (op0.getIndex() >= leftFieldCount)) {
    // Arguments were of form 'op1 = op0'
    leftField = op1;
    rightField = op0;
    } else {
    // Jesse - We fall through into here because
    // leftFieldCount = 1 (["id"]), so
    // $0 < leftFieldCount, but also $0 (right side) < leftFieldCount.
    // But it is an equijoin!
    nonEquiList.add(condition);
    return;
    }

    // Jesse - This is logically what we want to happen, but the field indexes for the
    // right side to be _higher_ than those of the left side. We get this when building
    // from the SqlNode because they are tracked through the Blackboard, but when we build
    // the condition it doesn't work out.
    leftKeys.add(leftField.getIndex());
    rightKeys.add(rightField.getIndex() - leftFieldCount);
    return;
    }
    10 changes: 5 additions & 5 deletions drill-to-rel-table-example.java
    Original file line number Diff line number Diff line change
    @@ -89,17 +89,17 @@ private RexNode composeCondition(String... fieldNames) {
    for (String fieldName : fieldNames) {
    conditions.add(
    builder.call(SqlStdOperatorTable.EQUALS,
    field(table1, fieldName),
    field(table2, fieldName)));
    field(table1, fieldName, 0),
    field(table2, fieldName, 1)));
    }
    return RexUtil.composeConjunction(cluster.getRexBuilder(), conditions, false);
    }

    private RexNode field(RelNode table1, String fieldName) {
    RelDataType row = table1.getRowType();
    private RexNode field(RelNode table, String fieldName, int offset) {
    RelDataType row = table.getRowType();
    RelDataTypeField field = row.getField(fieldName, true, false);
    int index = field.getIndex();
    return cluster.getRexBuilder().makeInputRef(row, index);
    return cluster.getRexBuilder().makeInputRef(row, index + offset);
    }
    }
    }
    14 changes: 10 additions & 4 deletions logical plan
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,11 @@
    2016-06-01 23:00:05 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[140] - HEP_BOTTOM_UP:Directory Prune Planning (7ms):
    2016-06-02 00:02:00 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[140] - HEP_BOTTOM_UP:Directory Prune Planning (7ms):
    LogicalProject(*=[$0]): rowcount = 1500.0, cumulative cost = {3200.0 rows, 1702.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 16
    LogicalJoin(condition=[=($0, $0)], joinType=[full]): rowcount = 1500.0, cumulative cost = {1700.0 rows, 202.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 14
    LogicalTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 3
    LogicalTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 2
    LogicalJoin(condition=[=($0, $1)], joinType=[inner]): rowcount = 1500.0, cumulative cost = {1700.0 rows, 202.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 14
    LogicalTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 3
    LogicalTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 2

    2016-06-02 00:02:00 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[140] - HEP_BOTTOM_UP:LOPT Join Planning (17ms):
    DrillProjectRel(*=[$0]): rowcount = 1.0, cumulative cost = {4.0 rows, 22.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 55
    DrillJoinRel(condition=[=($0, $1)], joinType=[inner]): rowcount = 1.0, cumulative cost = {4.0 rows, 22.0 cpu, 0.0 io, 0.0 network, 17.6 memory}, id = 60
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 42
    DrillScanRel(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 41
    63 changes: 11 additions & 52 deletions physical plan
    Original file line number Diff line number Diff line change
    @@ -1,52 +1,11 @@
    AbstractConverter(subset=[rel#77:Subset#8.PHYSICAL.SINGLETON([]).[]], convention=[PHYSICAL], DrillDistributionTraitDef=[SINGLETON([])], sort=[[]]): rowcount = 1.0, cumulative cost = {inf}, id = 79
    DrillScreenRel(subset=[rel#76:Subset#8.LOGICAL.ANY([]).[]]): rowcount = 1.0, cumulative cost = {0.1 rows, 0.1 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 75
    DrillProjectRel(subset=[rel#74:Subset#7.LOGICAL.ANY([]).[]], *=[$0]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 73
    DrillJoinRel(subset=[rel#72:Subset#6.LOGICAL.ANY([]).[]], condition=[=($0, $0)], joinType=[full]): rowcount = 1.0, cumulative cost = {1.0 rows, 200000.0 cpu, 0.0 io, 0.0 network, 176000.0 memory}, id = 71
    DrillScanRel(subset=[rel#69:Subset#4.LOGICAL.ANY([]).[]], table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 42
    DrillScanRel(subset=[rel#70:Subset#5.LOGICAL.ANY([]).[]], table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 41
    Sets:
    Set#4, type: (DrillRecordRow[companykey])
    rel#69:Subset#4.LOGICAL.ANY([]).[], best=rel#42, importance=0.6561
    rel#42:DrillScanRel.LOGICAL.ANY([]).[](table=[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json],groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]), rowcount=1.0, cumulative cost={1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}
    rel#99:AbstractConverter.LOGICAL.ANY([]).[](input=rel#98:Subset#4.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#98:Subset#4.PHYSICAL.SINGLETON([]).[], best=rel#97, importance=0.5904900000000001
    rel#100:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#69:Subset#4.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#97:ScanPrel.PHYSICAL.SINGLETON([]).[](groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]), rowcount=1.0, cumulative cost={tiny}
    Set#5, type: (DrillRecordRow[companykey])
    rel#70:Subset#5.LOGICAL.ANY([]).[], best=rel#41, importance=0.6561
    rel#41:DrillScanRel.LOGICAL.ANY([]).[](table=[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json],groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]), rowcount=1.0, cumulative cost={1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}
    rel#103:AbstractConverter.LOGICAL.ANY([]).[](input=rel#102:Subset#5.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#102:Subset#5.PHYSICAL.SINGLETON([]).[], best=rel#101, importance=0.5904900000000001
    rel#104:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#70:Subset#5.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#101:ScanPrel.PHYSICAL.SINGLETON([]).[](groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]), rowcount=1.0, cumulative cost={tiny}
    Set#6, type: RecordType(ANY companykey, ANY companykey0)
    rel#72:Subset#6.LOGICAL.ANY([]).[], best=rel#71, importance=0.7290000000000001
    rel#71:DrillJoinRel.LOGICAL.ANY([]).[](left=rel#69:Subset#4.LOGICAL.ANY([]).[],right=rel#70:Subset#5.LOGICAL.ANY([]).[],condition==($0, $0),joinType=full), rowcount=1.0, cumulative cost={3.0 rows, 200002.0 cpu, 0.0 io, 0.0 network, 176000.0 memory}
    rel#86:AbstractConverter.LOGICAL.ANY([]).[](input=rel#85:Subset#6.PHYSICAL.ANY([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#85:Subset#6.PHYSICAL.ANY([]).[], best=null, importance=0.6561
    rel#87:AbstractConverter.PHYSICAL.ANY([]).[](input=rel#72:Subset#6.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    Set#7, type: RecordType(ANY *)
    rel#74:Subset#7.LOGICAL.ANY([]).[], best=rel#73, importance=0.81
    rel#73:DrillProjectRel.LOGICAL.ANY([]).[](input=rel#72:Subset#6.LOGICAL.ANY([]).[],*=$0), rowcount=1.0, cumulative cost={4.0 rows, 200003.0 cpu, 0.0 io, 0.0 network, 176000.0 memory}
    rel#81:AbstractConverter.LOGICAL.ANY([]).[](input=rel#80:Subset#7.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#90:AbstractConverter.LOGICAL.ANY([]).[](input=rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#80:Subset#7.PHYSICAL.SINGLETON([]).[], best=null, importance=0.7290000000000001
    rel#82:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#74:Subset#7.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#91:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#94:UnionExchangePrel.PHYSICAL.SINGLETON([]).[](input=rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[]), rowcount=1.0, cumulative cost={inf}
    rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[], best=null, importance=0.6561
    rel#92:AbstractConverter.PHYSICAL.RANDOM_DISTRIBUTED([]).[](input=rel#74:Subset#7.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=RANDOM_DISTRIBUTED([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#93:AbstractConverter.PHYSICAL.RANDOM_DISTRIBUTED([]).[](input=rel#80:Subset#7.PHYSICAL.SINGLETON([]).[],convention=PHYSICAL,DrillDistributionTraitDef=RANDOM_DISTRIBUTED([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#88:ProjectPrel.PHYSICAL.RANDOM_DISTRIBUTED([]).[](input=rel#85:Subset#6.PHYSICAL.ANY([]).[],*=$0), rowcount=1.0, cumulative cost={inf}
    Set#8, type: RecordType(ANY *)
    rel#76:Subset#8.LOGICAL.ANY([]).[], best=rel#75, importance=0.9
    rel#75:DrillScreenRel.LOGICAL.ANY([]).[](input=rel#74:Subset#7.LOGICAL.ANY([]).[]), rowcount=1.0, cumulative cost={4.1 rows, 200003.1 cpu, 0.0 io, 0.0 network, 176000.0 memory}
    rel#78:AbstractConverter.LOGICAL.ANY([]).[](input=rel#77:Subset#8.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#77:Subset#8.PHYSICAL.SINGLETON([]).[], best=null, importance=1.0
    rel#79:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#76:Subset#8.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#83:ScreenPrel.PHYSICAL.SINGLETON([]).[](input=rel#80:Subset#7.PHYSICAL.SINGLETON([]).[]), rowcount=1.0, cumulative cost={inf}


    2016-06-01 23:00:05 INFO o.a.d.e.p.s.DrillSqlWorker[552] - User Error Occurred
    org.apache.drill.common.exceptions.UserException: UNSUPPORTED_OPERATION ERROR: This query cannot be planned possibly due to either a cartesian join or an inequality join

    2016-06-02 00:02:00 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[151] - Drill Physical:
    00-00 Screen : rowType = RecordType(ANY *): rowcount = 1.0, cumulative cost = {6.1 rows, 10.1 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 624
    00-01 Project(*=[$0]) : rowType = RecordType(ANY *): rowcount = 1.0, cumulative cost = {6.0 rows, 10.0 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 623
    00-02 MergeJoin(condition=[=($0, $1)], joinType=[inner]) : rowType = RecordType(ANY id, ANY id0): rowcount = 1.0, cumulative cost = {6.0 rows, 10.0 cpu, 0.0 io, 0.0 network, 16.0 memory}, id = 622
    00-04 SelectionVectorRemover : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 617
    00-06 Sort(sort0=[$0], dir0=[ASC]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {1.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 616
    00-08 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-2-513a7a13-0950-42c6-8265-765472451ff4.json]]]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 615
    00-03 Project(id0=[$0]) : rowType = RecordType(ANY id0): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 621
    00-05 SelectionVectorRemover : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {2.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 620
    00-07 Sort(sort0=[$0], dir0=[ASC]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {1.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 8.0 memory}, id = 619
    00-09 Scan(groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit4857731164319877248/drill/test-1-f49716af-48c4-4338-9a0c-5155b6a0548a.json]]]) : rowType = (DrillRecordRow[id]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 618
  5. jyates revised this gist Jun 2, 2016. 1 changed file with 14 additions and 1 deletion.
    15 changes: 14 additions & 1 deletion RelOptUtil.java snippet
    Original file line number Diff line number Diff line change
    @@ -14,6 +14,19 @@ private static void splitJoinCondition(...)
    leftField = op1;
    rightField = op0;
    } else {
    // Jesse - We fall through into here because
    // leftFieldCount = 1 (["id"]), so
    // $0 < leftFieldCount, but also $0 (right side) < leftFieldCount.
    // But it is an equijoin!
    nonEquiList.add(condition);
    return;
    }
    }

    // Jesse - This is logically what we want to happen, but the field indexes for the
    // right side to be _higher_ than those of the left side. We get this when building
    // from the SqlNode because they are tracked through the Blackboard, but when we build
    // the condition it doesn't work out.
    leftKeys.add(leftField.getIndex());
    rightKeys.add(rightField.getIndex() - leftFieldCount);
    return;
    }
  6. jyates revised this gist Jun 2, 2016. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions drill-to-rel-table-example.java
    Original file line number Diff line number Diff line change
    @@ -62,8 +62,6 @@ public LogicalScanBuilder(RelOptTable.ToRelContext context, RelOptTable relOptTa
    public LogicalScanBuilder scan(String... schemaAndTable) {
    RelOptTable table =
    relOptTable.getRelOptSchema().getTableForMember(newArrayList(schemaAndTable));
    // ensures that the "*" operator is added to the row type
    table.getRowType().getFieldCount();
    LogicalTableScan scan =
    new LogicalTableScan(cluster, cluster.traitSetOf(Convention.NONE), table);
    builder.push(scan);
  7. jyates revised this gist Jun 2, 2016. 1 changed file with 19 additions and 0 deletions.
    19 changes: 19 additions & 0 deletions RelOptUtil.java snippet
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,19 @@
    private static void splitJoinCondition(...)
    ....
    RexInputRef leftField;
    RexInputRef rightField;
    if ((op0.getIndex() < leftFieldCount)
    && (op1.getIndex() >= leftFieldCount)) {
    // Arguments were of form 'op0 = op1'
    leftField = op0;
    rightField = op1;
    } else if (
    (op1.getIndex() < leftFieldCount)
    && (op0.getIndex() >= leftFieldCount)) {
    // Arguments were of form 'op1 = op0'
    leftField = op1;
    rightField = op0;
    } else {
    nonEquiList.add(condition);
    return;
    }
  8. jyates revised this gist Jun 2, 2016. 1 changed file with 52 additions and 0 deletions.
    52 changes: 52 additions & 0 deletions physical plan
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,52 @@
    AbstractConverter(subset=[rel#77:Subset#8.PHYSICAL.SINGLETON([]).[]], convention=[PHYSICAL], DrillDistributionTraitDef=[SINGLETON([])], sort=[[]]): rowcount = 1.0, cumulative cost = {inf}, id = 79
    DrillScreenRel(subset=[rel#76:Subset#8.LOGICAL.ANY([]).[]]): rowcount = 1.0, cumulative cost = {0.1 rows, 0.1 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 75
    DrillProjectRel(subset=[rel#74:Subset#7.LOGICAL.ANY([]).[]], *=[$0]): rowcount = 1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 73
    DrillJoinRel(subset=[rel#72:Subset#6.LOGICAL.ANY([]).[]], condition=[=($0, $0)], joinType=[full]): rowcount = 1.0, cumulative cost = {1.0 rows, 200000.0 cpu, 0.0 io, 0.0 network, 176000.0 memory}, id = 71
    DrillScanRel(subset=[rel#69:Subset#4.LOGICAL.ANY([]).[]], table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 42
    DrillScanRel(subset=[rel#70:Subset#5.LOGICAL.ANY([]).[]], table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]], groupscan=[EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 41
    Sets:
    Set#4, type: (DrillRecordRow[companykey])
    rel#69:Subset#4.LOGICAL.ANY([]).[], best=rel#42, importance=0.6561
    rel#42:DrillScanRel.LOGICAL.ANY([]).[](table=[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json],groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]), rowcount=1.0, cumulative cost={1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}
    rel#99:AbstractConverter.LOGICAL.ANY([]).[](input=rel#98:Subset#4.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#98:Subset#4.PHYSICAL.SINGLETON([]).[], best=rel#97, importance=0.5904900000000001
    rel#100:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#69:Subset#4.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#97:ScanPrel.PHYSICAL.SINGLETON([]).[](groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]), rowcount=1.0, cumulative cost={tiny}
    Set#5, type: (DrillRecordRow[companykey])
    rel#70:Subset#5.LOGICAL.ANY([]).[], best=rel#41, importance=0.6561
    rel#41:DrillScanRel.LOGICAL.ANY([]).[](table=[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json],groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]), rowcount=1.0, cumulative cost={1.0 rows, 1.0 cpu, 0.0 io, 0.0 network, 0.0 memory}
    rel#103:AbstractConverter.LOGICAL.ANY([]).[](input=rel#102:Subset#5.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#102:Subset#5.PHYSICAL.SINGLETON([]).[], best=rel#101, importance=0.5904900000000001
    rel#104:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#70:Subset#5.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#101:ScanPrel.PHYSICAL.SINGLETON([]).[](groupscan=EasyGroupScan [selectionRoot=file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json, numFiles=1, columns=[`*`], files=[file:/var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]), rowcount=1.0, cumulative cost={tiny}
    Set#6, type: RecordType(ANY companykey, ANY companykey0)
    rel#72:Subset#6.LOGICAL.ANY([]).[], best=rel#71, importance=0.7290000000000001
    rel#71:DrillJoinRel.LOGICAL.ANY([]).[](left=rel#69:Subset#4.LOGICAL.ANY([]).[],right=rel#70:Subset#5.LOGICAL.ANY([]).[],condition==($0, $0),joinType=full), rowcount=1.0, cumulative cost={3.0 rows, 200002.0 cpu, 0.0 io, 0.0 network, 176000.0 memory}
    rel#86:AbstractConverter.LOGICAL.ANY([]).[](input=rel#85:Subset#6.PHYSICAL.ANY([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#85:Subset#6.PHYSICAL.ANY([]).[], best=null, importance=0.6561
    rel#87:AbstractConverter.PHYSICAL.ANY([]).[](input=rel#72:Subset#6.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    Set#7, type: RecordType(ANY *)
    rel#74:Subset#7.LOGICAL.ANY([]).[], best=rel#73, importance=0.81
    rel#73:DrillProjectRel.LOGICAL.ANY([]).[](input=rel#72:Subset#6.LOGICAL.ANY([]).[],*=$0), rowcount=1.0, cumulative cost={4.0 rows, 200003.0 cpu, 0.0 io, 0.0 network, 176000.0 memory}
    rel#81:AbstractConverter.LOGICAL.ANY([]).[](input=rel#80:Subset#7.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#90:AbstractConverter.LOGICAL.ANY([]).[](input=rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#80:Subset#7.PHYSICAL.SINGLETON([]).[], best=null, importance=0.7290000000000001
    rel#82:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#74:Subset#7.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#91:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#94:UnionExchangePrel.PHYSICAL.SINGLETON([]).[](input=rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[]), rowcount=1.0, cumulative cost={inf}
    rel#89:Subset#7.PHYSICAL.RANDOM_DISTRIBUTED([]).[], best=null, importance=0.6561
    rel#92:AbstractConverter.PHYSICAL.RANDOM_DISTRIBUTED([]).[](input=rel#74:Subset#7.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=RANDOM_DISTRIBUTED([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#93:AbstractConverter.PHYSICAL.RANDOM_DISTRIBUTED([]).[](input=rel#80:Subset#7.PHYSICAL.SINGLETON([]).[],convention=PHYSICAL,DrillDistributionTraitDef=RANDOM_DISTRIBUTED([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#88:ProjectPrel.PHYSICAL.RANDOM_DISTRIBUTED([]).[](input=rel#85:Subset#6.PHYSICAL.ANY([]).[],*=$0), rowcount=1.0, cumulative cost={inf}
    Set#8, type: RecordType(ANY *)
    rel#76:Subset#8.LOGICAL.ANY([]).[], best=rel#75, importance=0.9
    rel#75:DrillScreenRel.LOGICAL.ANY([]).[](input=rel#74:Subset#7.LOGICAL.ANY([]).[]), rowcount=1.0, cumulative cost={4.1 rows, 200003.1 cpu, 0.0 io, 0.0 network, 176000.0 memory}
    rel#78:AbstractConverter.LOGICAL.ANY([]).[](input=rel#77:Subset#8.PHYSICAL.SINGLETON([]).[],convention=LOGICAL,DrillDistributionTraitDef=ANY([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#77:Subset#8.PHYSICAL.SINGLETON([]).[], best=null, importance=1.0
    rel#79:AbstractConverter.PHYSICAL.SINGLETON([]).[](input=rel#76:Subset#8.LOGICAL.ANY([]).[],convention=PHYSICAL,DrillDistributionTraitDef=SINGLETON([]),sort=[]), rowcount=1.0, cumulative cost={inf}
    rel#83:ScreenPrel.PHYSICAL.SINGLETON([]).[](input=rel#80:Subset#7.PHYSICAL.SINGLETON([]).[]), rowcount=1.0, cumulative cost={inf}


    2016-06-01 23:00:05 INFO o.a.d.e.p.s.DrillSqlWorker[552] - User Error Occurred
    org.apache.drill.common.exceptions.UserException: UNSUPPORTED_OPERATION ERROR: This query cannot be planned possibly due to either a cartesian join or an inequality join

  9. jyates revised this gist Jun 2, 2016. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions logical plan
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,5 @@
    2016-06-01 23:00:05 DEBUG o.a.d.e.p.s.h.DefaultSqlHandler[140] - HEP_BOTTOM_UP:Directory Prune Planning (7ms):
    LogicalProject(*=[$0]): rowcount = 1500.0, cumulative cost = {3200.0 rows, 1702.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 16
    LogicalJoin(condition=[=($0, $0)], joinType=[full]): rowcount = 1500.0, cumulative cost = {1700.0 rows, 202.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 14
    LogicalTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-2-68a27b3c-0239-441a-8e7a-ffacee78690d.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 3
    LogicalTableScan(table=[[dfs, /var/folders/43/tsp1ph8n5b96whkk0j_bkl540000gn/T/junit9083020172631443709/drill/test-1-0bb21e3c-261c-4b15-b7eb-91035fe184e2.json]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 2
  10. jyates created this gist Jun 2, 2016.
    107 changes: 107 additions & 0 deletions drill-to-rel-table-example.java
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,107 @@
    package io.fineo.read.drill.exec.store.rel;

    import org.apache.calcite.plan.Convention;
    import org.apache.calcite.plan.RelOptCluster;
    import org.apache.calcite.plan.RelOptTable;
    import org.apache.calcite.rel.RelNode;
    import org.apache.calcite.rel.core.JoinRelType;
    import org.apache.calcite.rel.logical.LogicalTableScan;
    import org.apache.calcite.rel.type.RelDataType;
    import org.apache.calcite.rel.type.RelDataTypeField;
    import org.apache.calcite.rex.RexNode;
    import org.apache.calcite.rex.RexUtil;
    import org.apache.calcite.schema.TranslatableTable;
    import org.apache.calcite.sql.fun.SqlStdOperatorTable;
    import org.apache.calcite.tools.RelBuilder;
    import org.apache.drill.exec.planner.logical.DynamicDrillTable;

    import java.util.ArrayList;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;

    import static com.google.common.collect.Lists.newArrayList;

    public class MyTable extends DynamicDrillTable implements TranslatableTable {

    private final SchemaStore schema;

    public MyTable(MyPlugin plugin,
    String storageEngineName, String userName, Object selection, SchemaStore store) {
    super(plugin, storageEngineName, userName, selection);
    this.schema = store;
    }

    @Override
    public RelNode toRel(RelOptTable.ToRelContext context, RelOptTable relOptTable) {
    LogicalScanBuilder builder = new LogicalScanBuilder(context, relOptTable);
    for (String sourcePath : newArrayList("/drill/table1.json", "/drill/table2.json")) {
    builder.scan("dfs", sourcePath);
    }
    return builder.build();
    }

    private static class LogicalScanBuilder {

    private final RelBuilder builder;
    private final RelOptTable relOptTable;
    private final RelOptCluster cluster;
    private int scanCount = 0;

    public LogicalScanBuilder(RelOptTable.ToRelContext context, RelOptTable relOptTable) {
    this.cluster = context.getCluster();
    this.relOptTable = relOptTable;
    this.builder = RelBuilder.proto(cluster.getPlanner().getContext())
    .create(cluster, relOptTable.getRelOptSchema());
    }

    /**
    * Work around for {@link RelBuilder#scan(String)} not taking multiple String parts as in
    * Calcite 1.8. Once Drill bumps up, we can replace with just using that
    */
    public LogicalScanBuilder scan(String... schemaAndTable) {
    RelOptTable table =
    relOptTable.getRelOptSchema().getTableForMember(newArrayList(schemaAndTable));
    // ensures that the "*" operator is added to the row type
    table.getRowType().getFieldCount();
    LogicalTableScan scan =
    new LogicalTableScan(cluster, cluster.traitSetOf(Convention.NONE), table);
    builder.push(scan);
    scanCount++;
    return this;
    }

    public RelNode build() {
    // join all the sub-tables together on the common keys
    for (int i = 0; i < scanCount - 1; i++) {
    // ideally do:
    // builder.join(JoinRelType.FULL, "id")
    // but seem to have to make our own version:
    builder.join(JoinRelType.FULL, composeCondition("id"));
    }

    return builder.build();
    }

    private RexNode composeCondition(String... fieldNames) {
    RelNode table1 = builder.peek(0);
    RelNode table2 = builder.peek(1);
    // build the rex node for the two tables
    final List<RexNode> conditions = new ArrayList<>();
    for (String fieldName : fieldNames) {
    conditions.add(
    builder.call(SqlStdOperatorTable.EQUALS,
    field(table1, fieldName),
    field(table2, fieldName)));
    }
    return RexUtil.composeConjunction(cluster.getRexBuilder(), conditions, false);
    }

    private RexNode field(RelNode table1, String fieldName) {
    RelDataType row = table1.getRowType();
    RelDataTypeField field = row.getField(fieldName, true, false);
    int index = field.getIndex();
    return cluster.getRexBuilder().makeInputRef(row, index);
    }
    }
    }