Skip to content

Instantly share code, notes, and snippets.

@janko
Last active November 1, 2020 17:26
Show Gist options
  • Select an option

  • Save janko/99e2196b7af178dc01474b51cd72b5de to your computer and use it in GitHub Desktop.

Select an option

Save janko/99e2196b7af178dc01474b51cd72b5de to your computer and use it in GitHub Desktop.

Revisions

  1. janko revised this gist Nov 1, 2020. 2 changed files with 10 additions and 9 deletions.
    14 changes: 7 additions & 7 deletions results.md
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,7 @@
    | Strategy | Execution time | Memory allocated |
    | :------ | -------------: | ----------------: |
    | individual inserts | 35.7 seconds | 478 MB |
    | individual prepared inserts | 23.8 seconds | 634 MB |
    | bulk insert | 8.4 seconds | 162 MB |
    | batched bulk insert | 7.9 seconds | 158 MB |
    | database insert | 2.0 seconds | 0 MB |
    | Strategy | Execution time | Objects allocated | Memory allocated |
    | :------ | -------------: | ----------------: | ----------------: |
    | individual inserts | 35.7 seconds | 610k | 478 MB |
    | individual prepared inserts | 23.8 seconds | 480k | 634 MB |
    | bulk insert | 8.4 seconds | 21k | 162 MB |
    | batched bulk insert | 7.9 seconds | 21k | 158 MB |
    | database insert | 2.0 seconds | 94 | 0 MB |
    5 changes: 3 additions & 2 deletions script.rb
    Original file line number Diff line number Diff line change
    @@ -61,7 +61,8 @@
    end
    end
    allocated_mb = memory_report.total_allocated_memsize.to_f / (1024 * 1024)
    puts "#{name} (%.1fs, allocated: %dMB)" % [execution_time, allocated_mb]
    allocations = memory_report.total_allocated
    puts "#{name} (%.1fs, memory: %dMB, objects: %d)" % [execution_time, allocated_mb, allocations]
    end

    measure.("individual inserts") do
    @@ -146,4 +147,4 @@
    drop_column :event # this table will only hold approval logs now
    drop_column :target # this was specific to publication logs
    set_column_not_null :user_id # only publication logs didn't have user id set
    end
    end
  2. janko revised this gist Nov 1, 2020. 1 changed file with 7 additions and 0 deletions.
    7 changes: 7 additions & 0 deletions results.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,7 @@
    | Strategy | Execution time | Memory allocated |
    | :------ | -------------: | ----------------: |
    | individual inserts | 35.7 seconds | 478 MB |
    | individual prepared inserts | 23.8 seconds | 634 MB |
    | bulk insert | 8.4 seconds | 162 MB |
    | batched bulk insert | 7.9 seconds | 158 MB |
    | database insert | 2.0 seconds | 0 MB |
  3. janko revised this gist Nov 1, 2020. No changes.
  4. janko created this gist Nov 1, 2020.
    6 changes: 6 additions & 0 deletions Gemfile
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,6 @@
    source "https://rubygems.org"

    gem "sequel"
    gem "pg"
    gem "sequel_pg"
    gem "memory_profiler"
    21 changes: 21 additions & 0 deletions Gemfile.lock
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,21 @@
    GEM
    remote: https://rubygems.org/
    specs:
    memory_profiler (0.9.14)
    pg (1.2.3)
    sequel (5.37.0)
    sequel_pg (1.14.0)
    pg (>= 0.18.0, != 1.2.0)
    sequel (>= 4.38.0)

    PLATFORMS
    ruby

    DEPENDENCIES
    memory_profiler
    pg
    sequel
    sequel_pg

    BUNDLED WITH
    2.1.4
    149 changes: 149 additions & 0 deletions script.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,149 @@
    require "bundler/setup"
    require "sequel"
    require "benchmark"
    require "memory_profiler"

    system "createdb dataset-insert"
    DB = Sequel.postgres("dataset-insert")

    at_exit do
    DB.disconnect
    system "dropdb dataset-insert"
    end

    DB.create_table(:playlists) { primary_key :id }
    DB.create_table(:users) { primary_key :id }

    DB.create_table :activity_logs do
    primary_key :id
    foreign_key :playlist_id, :playlists, null: false
    foreign_key :user_id, :users
    String :event, null: false
    String :action, null: false
    String :message
    String :target
    Time :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP
    end

    DB[:playlists].multi_insert [{ id: 1 }, { id: 2 }]
    DB[:users].multi_insert [{ id: 1 }]

    # 100,000 approval logs
    DB[:activity_logs].multi_insert Array.new(100_000, {
    playlist_id: 1,
    user_id: 1,
    event: "approval",
    action: "approve",
    message: "Looks good!",
    })

    # 100,000 publication logs
    DB[:activity_logs].multi_insert Array.new(100_000, {
    playlist_id: 2,
    event: "publication",
    action: "published",
    target: "Video Wall 1",
    })

    DB.create_table :publication_logs do
    primary_key :id
    foreign_key :playlist_id, :playlists, null: false
    String :action, null: false
    String :target
    Time :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP
    end

    measure = -> (name, &block) do
    execution_time = nil
    memory_report = MemoryProfiler.report do
    execution_time = Benchmark.realtime do
    DB.transaction(rollback: :always, &block)
    end
    end
    allocated_mb = memory_report.total_allocated_memsize.to_f / (1024 * 1024)
    puts "#{name} (%.1fs, allocated: %dMB)" % [execution_time, allocated_mb]
    end

    measure.("individual inserts") do
    # select records we want to move
    publication_logs = DB[:activity_logs].where(event: "publication")

    # insert each record individually into the new table
    publication_logs.each do |log|
    DB[:publication_logs].insert(
    playlist_id: log[:playlist_id],
    action: log[:action],
    target: log[:target],
    created_at: log[:created_at],
    )
    end

    # delete records from the old table
    publication_logs.delete
    end

    measure.("individual prepared inserts") do
    # select records we want to move
    publication_logs = DB[:activity_logs].where(event: "publication")

    prepared_insert = DB[:publication_logs].prepare :insert, :insert_publication_data,
    playlist_id: :$playlist_id, action: :$action, target: :$target, created_at: :$created_at

    # insert each record individually into the new table
    publication_logs.each do |log|
    prepared_insert.call(
    playlist_id: log[:playlist_id],
    action: log[:action],
    target: log[:target],
    created_at: log[:created_at],
    )
    end

    # delete records from the old table
    publication_logs.delete
    end

    measure.("bulk insert") do
    # select records we want to move
    publication_logs = DB[:activity_logs].where(event: "publication")

    # insert each record individually into the new table
    DB[:publication_logs].import [:playlist_id, :action, :target, :created_at],
    publication_logs.map { |log| log.fetch_values(:playlist_id, :action, :target, :created_at) }

    # delete records from the old table
    publication_logs.delete
    end

    measure.("batched bulk insert") do
    # select records we want to move
    publication_logs = DB[:activity_logs].where(event: "publication")

    # insert each record individually into the new table
    publication_logs.each_slice(1000) do |logs|
    DB[:publication_logs].import [:playlist_id, :action, :target, :created_at],
    logs.map { |log| log.fetch_values(:playlist_id, :action, :target, :created_at) }
    end

    # delete records from the old table
    publication_logs.delete
    end

    measure.("database insert") do
    # select records we want to move
    publication_logs = DB[:activity_logs].where(event: "publication")

    DB[:publication_logs].import [:playlist_id, :action, :target, :created_at],
    publication_logs.select(:playlist_id, :action, :target, :created_at)

    # delete records from the old table
    publication_logs.delete
    end

    DB[:activity_logs].where(event: "publication").delete

    DB.alter_table :activity_logs do
    drop_column :event # this table will only hold approval logs now
    drop_column :target # this was specific to publication logs
    set_column_not_null :user_id # only publication logs didn't have user id set
    end