Skip to content

Instantly share code, notes, and snippets.

@w00lf
Forked from boblail/sql_rocket
Created December 12, 2013 19:40
Show Gist options
  • Save w00lf/7934093 to your computer and use it in GitHub Desktop.
Save w00lf/7934093 to your computer and use it in GitHub Desktop.

Revisions

  1. @boblail boblail revised this gist Aug 6, 2013. 1 changed file with 227 additions and 85 deletions.
    312 changes: 227 additions & 85 deletions sql_rocket
    Original file line number Diff line number Diff line change
    @@ -6,13 +6,19 @@ require 'benchmark'
    require 'nokogiri'
    require 'progressbar'
    require 'tempfile'
    # require 'pry'

    class SqlRocket

    class SqlRocket < Nokogiri::XML::SAX::Document

    FIELDS_WHERE_NIL_IS_PROHIBITED = %w{created_at updated_at}





    # SqlRocket command API

    class << self


    # Exports a MySQL database, scheme and data, to an XML file.
    def dump(database_name, output_file=nil)
    $stderr.print "Dumping #{database_name}...\n"
    @@ -52,28 +58,31 @@ class SqlRocket
    xml_file.unlink
    end
    end

    end








    # SqlRocket convert logic

    def initialize(io)
    @io = io
    @reader = Nokogiri::XML::Reader(io)
    @progress = ProgressBar.new("Converting", io.size)
    @parser = Nokogiri::XML::SAX::Parser.new(self)
    # @parser.replace_entities = true
    @schema_by_table = {}
    @inserts = []
    @mode = :continue
    end

    attr_reader :io, :reader


    attr_reader :io, :parser, :progress, :output
    attr_reader :body, :table_name, :mode, :current_schema, :values, :row_id

    def convert(output)
    progress = ProgressBar.new("Converting", io.size)

    reader.each do |node|
    figure_it_out(node, output)
    progress.set io.pos
    end
    @start = Time.now
    @output = output
    parser.parse(io)

    progress.finish

    @@ -82,71 +91,146 @@ class SqlRocket
    output.puts "SELECT setval('#{table}_id_seq', (SELECT MAX(id)+1 FROM #{table}), FALSE);"
    end

    $stderr.puts "\e[32mFinished in \e[1m#{format_duration(Time.now - @start)}\e[0m"

    true

    rescue
    log_error $!.message
    puts "", body, ""
    raise
    end

    def figure_it_out(node, output)
    if node.node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
    case node.name
    when 'table_structure'
    @table_name = nil
    @mode = :continue





    # Mode

    def read_structure?
    mode == :read_structure
    end

    def read_data?
    mode == :read_data
    end

    def reset_mode!
    @rows = 0
    @row_id = nil
    @table_name = nil
    @mode = :continue
    end

    def read_structure!
    @mode = :read_structure
    end

    def read_data!
    @mode = :read_data
    end





    # SAX events

    def start_document
    reset_mode!
    end

    def start_element(name, attributes={})
    attributes = Hash[attributes] unless Hash === attributes
    @body = nil # we don't care about the body of this element

    case name
    when 'table_structure'
    read_structure!
    @table_name = attributes['name']
    @current_schema = @schema_by_table[table_name] = {}

    when 'table_data'
    @table_name = node.attribute('name')
    output.print ");" if @rows > 0
    $stderr.puts "\e[1m\e[32m#{@rows}\e[0m\e[32m rows inserted to \e[1m\e[32m#{@table_name}\e[0m"

    @rows = 0
    @table_name = nil
    @mode = :continue
    when 'table_data'
    read_data!
    @start_ts = Time.now
    @start_cpu = Process.times
    @table_name = attributes['name']
    @current_schema = @schema_by_table[table_name] # readonly
    @rows = 0

    when 'row'
    @rows += 1
    if @rows == 1
    output.print "INSERT INTO \"#{@table_name}\" (#{@current_schema.keys.join(",")}) VALUES ("
    else
    output.print ","
    end
    output.print "(#{@values.join(",")})"
    when 'row'
    @row_id = nil
    @values = []

    when 'field'
    add_field_to_table_structure(attributes) if read_structure?
    if read_data?
    @body = "" # we _do_ care about the body of this element
    @attributes = attributes # save for use in end_element
    end
    end

    if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
    case node.name
    when 'table_structure'
    @mode = :read_structure
    @table_name = node.attribute('name')
    @current_schema = @schema_by_table[@table_name] = {}
    end

    def end_element(name)
    progress.set io.pos

    case name
    when 'table_structure'
    reset_mode!

    when 'table_data'
    @mode = :read_data
    @table_name = node.attribute('name')
    @current_schema = @schema_by_table[@table_name] # readonly
    @rows = 0

    when 'row'
    @values = []

    when 'field'
    add_field_to_table_structure(node) if @mode == :read_structure
    add_value_to_table_row(node) if @mode == :read_data
    when 'table_data'
    output.print ");" if @rows > 0
    log_time_to_read_table
    reset_mode!

    when 'row'
    @rows += 1
    if @rows == 1
    output.print "INSERT INTO \"#{table_name}\" (#{current_schema.keys.join(",")}) VALUES ("
    else
    output.print ","
    end
    output.print "(#{values.join(",")})"

    when 'field'
    if read_data?
    @row_id = body.chomp if @attributes['name'] == 'id'
    add_value_to_table_row(@attributes, @body)
    end

    end
    end

    def add_field_to_table_structure(node)
    @current_schema[node.attribute('Field')] = node.attribute('Type')
    def error(string)
    log_error string
    end

    def add_value_to_table_row(node)
    type = @current_schema[node.attribute('name')]
    @values << output_postgres_value(node, type)

    def warning(string)
    log_warning string
    end

    def output_postgres_value(node, type)
    field = node.attribute('name')
    value = read_value_of(node, type)

    def characters(string)
    @body << string unless @body.nil?
    end





    # Element transformation

    def add_field_to_table_structure(attributes)
    current_schema[attributes['Field']] = attributes['Type']
    end

    def add_value_to_table_row(attributes, body)
    type = current_schema[attributes['name']]
    @values << output_postgres_value(type, attributes, body)
    end

    def output_postgres_value(type, attributes, body)
    field = attributes['name']
    value = read_value_of(type, attributes, body)

    validate_value!(field, value, type)

    @@ -158,9 +242,8 @@ class SqlRocket
    raise NotImplementedError, "Type \"#{type}\" (of field \"#{field}\") is unhandled"
    end

    def read_value_of(node, type)
    return nil if node.attribute('xsi:nil') == "true"
    value = Nokogiri::XML.fragment(node.inner_xml).text
    def read_value_of(type, attributes, value)
    return nil if attributes['xsi:nil'] == "true"
    coerce_value(value, type)
    end

    @@ -169,22 +252,26 @@ class SqlRocket
    value
    end



    FIELDS_WHERE_NIL_IS_PROHIBITED = %w{created_at updated_at}

    def validate_value!(field, value, type)
    if FIELDS_WHERE_NIL_IS_PROHIBITED.member?(field) && value.nil?
    $stderr.puts "\e[31mERROR\e[0m NULL value is not allowed for #{@table_name}.#{field}"
    log_error "NULL value is not allowed for #{table_name}.#{field}"
    end
    end





    # Postgres SQL formatting statements

    def output_nil
    "NULL"
    end


    def output_postgres_boolean(value)
    output_quoted(value == '1' ? 't' : 'f')
    end

    def output_quoted(value)
    escaped = value
    .gsub(/'/, "''")
    @@ -193,15 +280,70 @@ class SqlRocket
    .gsub(/\\xE2\\x80\\x99/, "’")
    "'#{escaped}'"
    end

    def output_postgres_boolean(value)
    output_quoted(value == '1' ? 't' : 'f')





    # Logging

    def log_error(message)
    log_with_tags "\e[31mERROR \e[0m#{message}"
    end


    def log_warning(message)
    log_with_tags "\e[33mWARNING \e[0m#{message}"
    end

    def log_with_tags(message)
    $stderr.print message.chomp
    hash = {}
    hash["table"] = table_name if table_name
    hash["id"] = row_id if row_id
    hash["pos"] = io.pos
    hash["time"] = "%.2f" % (Time.now - @start)

    $stderr.print " {"
    $stderr.print hash.map { |key, value| "#{key}: #{value}" }.join(", ")
    $stderr.print "}\n"
    end

    def log_time_to_read_table
    end_ts = Time.now
    end_cpu = Process.times
    real_seconds = end_ts - @start_ts
    user_seconds = end_cpu.utime - @start_cpu.utime
    kernel_seconds = end_cpu.stime - @start_cpu.stime

    $stderr.print "\e[32m\e[1m#{@rows}\e[0m\e[32m inserts"
    $stderr.print " for \e[1m#{table_name}\e[0m"
    $stderr.print "\e[32m in \e[1m#{format_duration(real_seconds)}\e[0m"
    $stderr.print "\e[32m {user: %.2f, kernel: %.2f}\e[0m" % [user_seconds, kernel_seconds]
    $stderr.print "\n"
    end

    def format_duration(seconds)
    hours = (seconds / 3600).to_i
    minutes = (seconds / 60).round

    if hours > 0
    minutes -= (hours * 60)
    "#{hours} hours and #{minutes} minutes"
    elsif minutes > 0
    "#{minutes} minutes"
    else
    "%.2f seconds" % seconds
    end
    end

    end





    # CLI

    command, arg = ARGV

    case command
  2. @boblail boblail revised this gist Jul 30, 2013. 1 changed file with 99 additions and 31 deletions.
    130 changes: 99 additions & 31 deletions sql_rocket
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,7 @@
    #!/usr/bin/env ruby
    # encoding: utf-8
    # https://gist.github.com/boblail/5587579

    require 'benchmark'
    require 'nokogiri'
    require 'progressbar'
    @@ -8,28 +11,42 @@ require 'tempfile'
    class SqlRocket

    class << self



    # Exports a MySQL database, scheme and data, to an XML file.
    def dump(database_name, output_file=nil)
    $stderr.print "Dumping #{database_name}..."
    $stderr.print "Dumping #{database_name}...\n"

    command = "mysqldump --skip-triggers --ignore-table #{database_name}.schema_migrations --xml #{ARGV[2..-1].join(" ")} #{database_name}"
    command << " > #{output_file}" if output_file

    ms = Benchmark.realtime do
    output = `#{command}`
    puts output unless output_file
    $stderr.print "#{command}\n"

    # We don't use backticks so that we don't
    # pipe stdout into Ruby. This way the shell
    # can redirect it to a file without getting
    # this script involved!
    system command
    end
    $stderr.puts " \e[32mfinished in #{"%.2f seconds" % ms}\e[0m"
    end



    # Converts a MySQL XML dump to INSERT statements
    # properly formatted for Postgres.
    def convert(xml_file)
    xml = File.open(xml_file)
    puts SqlRocket.new(xml).convert
    file = File.open(xml_file)
    SqlRocket.new(file).convert($stdout)
    end



    # Chains a dump and a convert together.
    def rocket(database_name)
    xml_file = Tempfile.new(database_name)
    dump(database_name, xml_file.path)
    begin
    puts SqlRocket.new(xml_file.open).convert
    SqlRocket.new(xml_file.open).convert($stdout)
    ensure
    xml_file.close
    xml_file.unlink
    @@ -50,47 +67,63 @@ class SqlRocket

    attr_reader :io, :reader

    def convert
    def convert(output)
    progress = ProgressBar.new("Converting", io.size)

    reader.each do |node|
    figure_it_out(node)
    figure_it_out(node, output)
    progress.set io.pos
    end

    progress.finish

    @inserts.join("\n")

    @schema_by_table.each do |table, schema|
    next unless schema.key?("id")
    output.puts "SELECT setval('#{table}_id_seq', (SELECT MAX(id)+1 FROM #{table}), FALSE);"
    end

    true
    end

    def figure_it_out(node)
    def figure_it_out(node, output)
    if node.node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
    case node.name
    when 'table_structure'
    @table_name = nil
    @mode = :continue

    when 'table_data'
    table_name = node.attribute('name')
    @inserts << "INSERT INTO \"#{table_name}\" VALUES #{@rows.join(",")};" if @rows.any?
    @table_name = node.attribute('name')
    output.print ");" if @rows > 0
    $stderr.puts "\e[1m\e[32m#{@rows}\e[0m\e[32m rows inserted to \e[1m\e[32m#{@table_name}\e[0m"

    @rows = 0
    @table_name = nil
    @mode = :continue

    when 'row'
    @rows << "(#{@values.join(",")})"
    @rows += 1
    if @rows == 1
    output.print "INSERT INTO \"#{@table_name}\" (#{@current_schema.keys.join(",")}) VALUES ("
    else
    output.print ","
    end
    output.print "(#{@values.join(",")})"
    end
    end

    if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
    case node.name
    when 'table_structure'
    @mode = :read_structure
    table_name = node.attribute('name')
    @current_schema = @schema_by_table[table_name] = {}
    @table_name = node.attribute('name')
    @current_schema = @schema_by_table[@table_name] = {}

    when 'table_data'
    @mode = :read_data
    table_name = node.attribute('name')
    @current_schema = @schema_by_table[table_name] # readonly
    @rows = []
    @table_name = node.attribute('name')
    @current_schema = @schema_by_table[@table_name] # readonly
    @rows = 0

    when 'row'
    @values = []
    @@ -112,17 +145,52 @@ class SqlRocket
    end

    def output_postgres_value(node, type)
    return "NULL" if node.attribute('xsi:nil') == "true"
    value = Nokogiri::XML.fragment(node.inner_xml).text
    field = node.attribute('name')
    value = read_value_of(node, type)

    validate_value!(field, value, type)

    return output_nil if value.nil?
    return output_postgres_boolean(value) if type == "tinyint(1)"
    return value if type =~ /^int/
    return output_quoted(value) if type =~ /^(decimal|varchar|text|date|datetime|time)/
    return output_postgres_boolean(value) if type == "tinyint(1)"

    raise NotImplementedError, "Field type #{type} is unhandled"

    raise NotImplementedError, "Type \"#{type}\" (of field \"#{field}\") is unhandled"
    end

    def read_value_of(node, type)
    return nil if node.attribute('xsi:nil') == "true"
    value = Nokogiri::XML.fragment(node.inner_xml).text
    coerce_value(value, type)
    end

    def coerce_value(value, type)
    return nil if type == "datetime" && value == "0000-00-00 00:00:00"
    value
    end



    FIELDS_WHERE_NIL_IS_PROHIBITED = %w{created_at updated_at}

    def validate_value!(field, value, type)
    if FIELDS_WHERE_NIL_IS_PROHIBITED.member?(field) && value.nil?
    $stderr.puts "\e[31mERROR\e[0m NULL value is not allowed for #{@table_name}.#{field}"
    end
    end



    def output_nil
    "NULL"
    end

    def output_quoted(value)
    escaped = value.gsub(/'/, "''")
    escaped = value
    .gsub(/'/, "''")
    .gsub(/\r\n/, "\n")
    .gsub(/\\n\\\n/, "\\n")
    .gsub(/\\xE2\\x80\\x99/, "’")
    "'#{escaped}'"
    end

  3. @boblail boblail created this gist May 15, 2013.
    144 changes: 144 additions & 0 deletions sql_rocket
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,144 @@
    #!/usr/bin/env ruby
    require 'benchmark'
    require 'nokogiri'
    require 'progressbar'
    require 'tempfile'
    # require 'pry'

    class SqlRocket

    class << self

    def dump(database_name, output_file=nil)
    $stderr.print "Dumping #{database_name}..."
    command = "mysqldump --skip-triggers --ignore-table #{database_name}.schema_migrations --xml #{ARGV[2..-1].join(" ")} #{database_name}"
    command << " > #{output_file}" if output_file
    ms = Benchmark.realtime do
    output = `#{command}`
    puts output unless output_file
    end
    $stderr.puts " \e[32mfinished in #{"%.2f seconds" % ms}\e[0m"
    end

    def convert(xml_file)
    xml = File.open(xml_file)
    puts SqlRocket.new(xml).convert
    end

    def rocket(database_name)
    xml_file = Tempfile.new(database_name)
    dump(database_name, xml_file.path)
    begin
    puts SqlRocket.new(xml_file.open).convert
    ensure
    xml_file.close
    xml_file.unlink
    end
    end

    end



    def initialize(io)
    @io = io
    @reader = Nokogiri::XML::Reader(io)
    @schema_by_table = {}
    @inserts = []
    @mode = :continue
    end

    attr_reader :io, :reader

    def convert
    progress = ProgressBar.new("Converting", io.size)

    reader.each do |node|
    figure_it_out(node)
    progress.set io.pos
    end

    progress.finish

    @inserts.join("\n")
    end

    def figure_it_out(node)
    if node.node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
    case node.name
    when 'table_structure'
    @mode = :continue

    when 'table_data'
    table_name = node.attribute('name')
    @inserts << "INSERT INTO \"#{table_name}\" VALUES #{@rows.join(",")};" if @rows.any?
    @mode = :continue

    when 'row'
    @rows << "(#{@values.join(",")})"
    end
    end

    if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
    case node.name
    when 'table_structure'
    @mode = :read_structure
    table_name = node.attribute('name')
    @current_schema = @schema_by_table[table_name] = {}

    when 'table_data'
    @mode = :read_data
    table_name = node.attribute('name')
    @current_schema = @schema_by_table[table_name] # readonly
    @rows = []

    when 'row'
    @values = []

    when 'field'
    add_field_to_table_structure(node) if @mode == :read_structure
    add_value_to_table_row(node) if @mode == :read_data
    end
    end
    end

    def add_field_to_table_structure(node)
    @current_schema[node.attribute('Field')] = node.attribute('Type')
    end

    def add_value_to_table_row(node)
    type = @current_schema[node.attribute('name')]
    @values << output_postgres_value(node, type)
    end

    def output_postgres_value(node, type)
    return "NULL" if node.attribute('xsi:nil') == "true"
    value = Nokogiri::XML.fragment(node.inner_xml).text
    return value if type =~ /^int/
    return output_quoted(value) if type =~ /^(decimal|varchar|text|date|datetime|time)/
    return output_postgres_boolean(value) if type == "tinyint(1)"

    raise NotImplementedError, "Field type #{type} is unhandled"
    end

    def output_quoted(value)
    escaped = value.gsub(/'/, "''")
    "'#{escaped}'"
    end

    def output_postgres_boolean(value)
    output_quoted(value == '1' ? 't' : 'f')
    end

    end



    command, arg = ARGV

    case command
    when "dump"; SqlRocket.dump arg
    when "convert"; SqlRocket.convert arg
    when "rocket"; SqlRocket.rocket arg
    else puts "Uh... Don't know #{command}. Please say dump, convert or rocket!"
    end