Skip to content

Instantly share code, notes, and snippets.

@dkourilov
Created April 13, 2016 15:56
Show Gist options
  • Save dkourilov/6580a9e1a1d443f1925f39953066b266 to your computer and use it in GitHub Desktop.
Save dkourilov/6580a9e1a1d443f1925f39953066b266 to your computer and use it in GitHub Desktop.

Revisions

  1. dkourilov created this gist Apr 13, 2016.
    143 changes: 143 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,143 @@
    denis-dev-01 ~/ny_taxi $ ll
    total 305M
    drwxrwxr-x 2 denis 4.0K Apr 13 15:50 .
    drwxr-xr-x 7 denis 4.0K Apr 13 15:50 ..
    -rw-rw-r-- 1 denis 1.1K Apr 13 15:41 main.inc.cpp
    -rw-rw-r-- 1 denis 318 Apr 13 15:39 Makefile
    -rwxrwxr-x 1 denis 343K Apr 13 15:50 test
    -rw-rw-r-- 1 denis 12K Apr 13 15:50 test.cpp
    -rw-rw-r-- 1 denis 305M Apr 13 14:22 test.csv
    -rw-rw-r-- 1 denis 179 Apr 13 14:21 test.py
    -rw-rw-r-- 1 denis 250 Apr 13 15:50 test.sql
    denis-dev-01 ~/ny_taxi $ cat test.py
    import random

    for x in range(0,10000000):
    id = x
    type = 1 if x % 2 else 2
    price = random.randint(10, 100) + 0.1
    print "%d,%d,%f,33.33,44.44" % (id, type, price)

    denis-dev-01 ~/ny_taxi $ cat main.inc.cpp
    #include <chrono>
    #include <functional>

    struct scoped_timer
    {
    std::chrono::system_clock::time_point start;
    std::function<void(int)> callback;

    scoped_timer(std::function<void(int)> callback) :
    start(std::chrono::system_clock::now()),
    callback(callback)
    {}

    ~scoped_timer()
    {
    auto finish = std::chrono::system_clock::now();
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
    finish - start).count();
    callback(duration);
    }
    };

    int main(int argc, char* argv[])
    {
    Test q(argc, argv);
    {
    std::cout << "-- started init" << std::endl;
    scoped_timer t([] (int ms) {
    std::cout << "-- finished init: " << ms << "ms" << std::endl;
    });
    q.init();
    }
    {
    std::cout << "-- started run" << std::endl;
    scoped_timer t([] (int ms) {
    std::cout << "-- finished run: " << ms << "ms" << std::endl;
    });
    q.run();
    }
    auto snap = q.get_snapshot();
    DBT_SERIALIZATION_NVP_OF_PTR(std::cout, snap);
    return 0;
    }

    denis-dev-01 ~/ny_taxi $ cat test.sql
    CREATE TABLE TAXI(
    id int,
    type int,
    price double,
    x double,
    y double
    )
    FROM FILE 'test.csv'
    LINE DELIMITED
    CSV (fields := ',');

    SELECT sum(price),type FROM TAXI r1 GROUP BY r1.type;

    SELECT COUNT(price) FROM TAXI WHERE type=2;
    denis-dev-01 ~/ny_taxi $ cat Makefile
    all:
    ../dbtoaster/bin/dbtoaster \
    test.sql -o test.cpp -l cpp -O3; \
    \
    echo '#include "main.inc.cpp"' >> test.cpp ;\
    \
    g++ test.cpp -std=c++11 -O3 \
    -o test \
    -I../dbtoaster/lib/dbt_c++ \
    -I/usr/include/boost \
    -L../dbtoaster/lib/dbt_c++ \
    -ldbtoaster

    clean:
    rm -rf test.cpp test.sql

    $(PHONY): all

    denis-dev-01 ~/ny_taxi $ /usr/bin/time -v ./test -v
    csv params: fields: ,
    csv params: schema: long,long,double,double,double
    reading from test.csv with 1 adaptors
    -- started init
    -- finished init: 23495ms
    -- started run
    -- finished run: 0ms
    <snap>
    <QUERY_1___SQL_SUM_AGGREGATE_1>
    <count>2</count>
    <item>
    <R1_TYPE>2</R1_TYPE>
    <__av>275623169.982614</__av>
    </item>
    <item>
    <R1_TYPE>1</R1_TYPE>
    <__av>275497093.982556</__av>
    </item>
    </QUERY_1___SQL_SUM_AGGREGATE_1>
    <QUERY_2___SQL_COUNT_AGGREGATE_2>5000000</QUERY_2___SQL_COUNT_AGGREGATE_2>
    </snap> Command being timed: "./test -v"
    User time (seconds): 23.20
    System time (seconds): 2.81
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:26.02
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 6919200
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 1190377
    Voluntary context switches: 13
    Involuntary context switches: 109
    Swaps: 0
    File system inputs: 0
    File system outputs: 0
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0