require "bundler/setup" require "sequel" require "benchmark" require "memory_profiler" system "createdb dataset-insert" DB = Sequel.postgres("dataset-insert") at_exit do DB.disconnect system "dropdb dataset-insert" end DB.create_table(:playlists) { primary_key :id } DB.create_table(:users) { primary_key :id } DB.create_table :activity_logs do primary_key :id foreign_key :playlist_id, :playlists, null: false foreign_key :user_id, :users String :event, null: false String :action, null: false String :message String :target Time :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP end DB[:playlists].multi_insert [{ id: 1 }, { id: 2 }] DB[:users].multi_insert [{ id: 1 }] # 100,000 approval logs DB[:activity_logs].multi_insert Array.new(100_000, { playlist_id: 1, user_id: 1, event: "approval", action: "approve", message: "Looks good!", }) # 100,000 publication logs DB[:activity_logs].multi_insert Array.new(100_000, { playlist_id: 2, event: "publication", action: "published", target: "Video Wall 1", }) DB.create_table :publication_logs do primary_key :id foreign_key :playlist_id, :playlists, null: false String :action, null: false String :target Time :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP end measure = -> (name, &block) do execution_time = nil memory_report = MemoryProfiler.report do execution_time = Benchmark.realtime do DB.transaction(rollback: :always, &block) end end allocated_mb = memory_report.total_allocated_memsize.to_f / (1024 * 1024) allocations = memory_report.total_allocated puts "#{name} (%.1fs, memory: %dMB, objects: %d)" % [execution_time, allocated_mb, allocations] end measure.("individual inserts") do # select records we want to move publication_logs = DB[:activity_logs].where(event: "publication") # insert each record individually into the new table publication_logs.each do |log| DB[:publication_logs].insert( playlist_id: log[:playlist_id], action: log[:action], target: log[:target], created_at: log[:created_at], ) end # delete records from the old table publication_logs.delete end measure.("individual prepared inserts") do # select records we want to move publication_logs = DB[:activity_logs].where(event: "publication") prepared_insert = DB[:publication_logs].prepare :insert, :insert_publication_data, playlist_id: :$playlist_id, action: :$action, target: :$target, created_at: :$created_at # insert each record individually into the new table publication_logs.each do |log| prepared_insert.call( playlist_id: log[:playlist_id], action: log[:action], target: log[:target], created_at: log[:created_at], ) end # delete records from the old table publication_logs.delete end measure.("bulk insert") do # select records we want to move publication_logs = DB[:activity_logs].where(event: "publication") # insert each record individually into the new table DB[:publication_logs].import [:playlist_id, :action, :target, :created_at], publication_logs.map { |log| log.fetch_values(:playlist_id, :action, :target, :created_at) } # delete records from the old table publication_logs.delete end measure.("batched bulk insert") do # select records we want to move publication_logs = DB[:activity_logs].where(event: "publication") # insert each record individually into the new table publication_logs.each_slice(1000) do |logs| DB[:publication_logs].import [:playlist_id, :action, :target, :created_at], logs.map { |log| log.fetch_values(:playlist_id, :action, :target, :created_at) } end # delete records from the old table publication_logs.delete end measure.("database insert") do # select records we want to move publication_logs = DB[:activity_logs].where(event: "publication") DB[:publication_logs].import [:playlist_id, :action, :target, :created_at], publication_logs.select(:playlist_id, :action, :target, :created_at) # delete records from the old table publication_logs.delete end DB[:activity_logs].where(event: "publication").delete DB.alter_table :activity_logs do drop_column :event # this table will only hold approval logs now drop_column :target # this was specific to publication logs set_column_not_null :user_id # only publication logs didn't have user id set end