|
|
@@ -6,13 +6,19 @@ require 'benchmark' |
|
|
require 'nokogiri' |
|
|
require 'progressbar' |
|
|
require 'tempfile' |
|
|
# require 'pry' |
|
|
|
|
|
class SqlRocket |
|
|
|
|
|
class SqlRocket < Nokogiri::XML::SAX::Document |
|
|
|
|
|
FIELDS_WHERE_NIL_IS_PROHIBITED = %w{created_at updated_at} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# SqlRocket command API |
|
|
|
|
|
class << self |
|
|
|
|
|
|
|
|
# Exports a MySQL database, scheme and data, to an XML file. |
|
|
def dump(database_name, output_file=nil) |
|
|
$stderr.print "Dumping #{database_name}...\n" |
|
|
@@ -52,28 +58,31 @@ class SqlRocket |
|
|
xml_file.unlink |
|
|
end |
|
|
end |
|
|
|
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# SqlRocket convert logic |
|
|
|
|
|
def initialize(io) |
|
|
@io = io |
|
|
@reader = Nokogiri::XML::Reader(io) |
|
|
@progress = ProgressBar.new("Converting", io.size) |
|
|
@parser = Nokogiri::XML::SAX::Parser.new(self) |
|
|
# @parser.replace_entities = true |
|
|
@schema_by_table = {} |
|
|
@inserts = [] |
|
|
@mode = :continue |
|
|
end |
|
|
|
|
|
attr_reader :io, :reader |
|
|
|
|
|
|
|
|
attr_reader :io, :parser, :progress, :output |
|
|
attr_reader :body, :table_name, :mode, :current_schema, :values, :row_id |
|
|
|
|
|
def convert(output) |
|
|
progress = ProgressBar.new("Converting", io.size) |
|
|
|
|
|
reader.each do |node| |
|
|
figure_it_out(node, output) |
|
|
progress.set io.pos |
|
|
end |
|
|
@start = Time.now |
|
|
@output = output |
|
|
parser.parse(io) |
|
|
|
|
|
progress.finish |
|
|
|
|
|
@@ -82,71 +91,146 @@ class SqlRocket |
|
|
output.puts "SELECT setval('#{table}_id_seq', (SELECT MAX(id)+1 FROM #{table}), FALSE);" |
|
|
end |
|
|
|
|
|
$stderr.puts "\e[32mFinished in \e[1m#{format_duration(Time.now - @start)}\e[0m" |
|
|
|
|
|
true |
|
|
|
|
|
rescue |
|
|
log_error $!.message |
|
|
puts "", body, "" |
|
|
raise |
|
|
end |
|
|
|
|
|
def figure_it_out(node, output) |
|
|
if node.node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT |
|
|
case node.name |
|
|
when 'table_structure' |
|
|
@table_name = nil |
|
|
@mode = :continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Mode |
|
|
|
|
|
def read_structure? |
|
|
mode == :read_structure |
|
|
end |
|
|
|
|
|
def read_data? |
|
|
mode == :read_data |
|
|
end |
|
|
|
|
|
def reset_mode! |
|
|
@rows = 0 |
|
|
@row_id = nil |
|
|
@table_name = nil |
|
|
@mode = :continue |
|
|
end |
|
|
|
|
|
def read_structure! |
|
|
@mode = :read_structure |
|
|
end |
|
|
|
|
|
def read_data! |
|
|
@mode = :read_data |
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# SAX events |
|
|
|
|
|
def start_document |
|
|
reset_mode! |
|
|
end |
|
|
|
|
|
def start_element(name, attributes={}) |
|
|
attributes = Hash[attributes] unless Hash === attributes |
|
|
@body = nil # we don't care about the body of this element |
|
|
|
|
|
case name |
|
|
when 'table_structure' |
|
|
read_structure! |
|
|
@table_name = attributes['name'] |
|
|
@current_schema = @schema_by_table[table_name] = {} |
|
|
|
|
|
when 'table_data' |
|
|
@table_name = node.attribute('name') |
|
|
output.print ");" if @rows > 0 |
|
|
$stderr.puts "\e[1m\e[32m#{@rows}\e[0m\e[32m rows inserted to \e[1m\e[32m#{@table_name}\e[0m" |
|
|
|
|
|
@rows = 0 |
|
|
@table_name = nil |
|
|
@mode = :continue |
|
|
when 'table_data' |
|
|
read_data! |
|
|
@start_ts = Time.now |
|
|
@start_cpu = Process.times |
|
|
@table_name = attributes['name'] |
|
|
@current_schema = @schema_by_table[table_name] # readonly |
|
|
@rows = 0 |
|
|
|
|
|
when 'row' |
|
|
@rows += 1 |
|
|
if @rows == 1 |
|
|
output.print "INSERT INTO \"#{@table_name}\" (#{@current_schema.keys.join(",")}) VALUES (" |
|
|
else |
|
|
output.print "," |
|
|
end |
|
|
output.print "(#{@values.join(",")})" |
|
|
when 'row' |
|
|
@row_id = nil |
|
|
@values = [] |
|
|
|
|
|
when 'field' |
|
|
add_field_to_table_structure(attributes) if read_structure? |
|
|
if read_data? |
|
|
@body = "" # we _do_ care about the body of this element |
|
|
@attributes = attributes # save for use in end_element |
|
|
end |
|
|
end |
|
|
|
|
|
if node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT |
|
|
case node.name |
|
|
when 'table_structure' |
|
|
@mode = :read_structure |
|
|
@table_name = node.attribute('name') |
|
|
@current_schema = @schema_by_table[@table_name] = {} |
|
|
end |
|
|
|
|
|
def end_element(name) |
|
|
progress.set io.pos |
|
|
|
|
|
case name |
|
|
when 'table_structure' |
|
|
reset_mode! |
|
|
|
|
|
when 'table_data' |
|
|
@mode = :read_data |
|
|
@table_name = node.attribute('name') |
|
|
@current_schema = @schema_by_table[@table_name] # readonly |
|
|
@rows = 0 |
|
|
|
|
|
when 'row' |
|
|
@values = [] |
|
|
|
|
|
when 'field' |
|
|
add_field_to_table_structure(node) if @mode == :read_structure |
|
|
add_value_to_table_row(node) if @mode == :read_data |
|
|
when 'table_data' |
|
|
output.print ");" if @rows > 0 |
|
|
log_time_to_read_table |
|
|
reset_mode! |
|
|
|
|
|
when 'row' |
|
|
@rows += 1 |
|
|
if @rows == 1 |
|
|
output.print "INSERT INTO \"#{table_name}\" (#{current_schema.keys.join(",")}) VALUES (" |
|
|
else |
|
|
output.print "," |
|
|
end |
|
|
output.print "(#{values.join(",")})" |
|
|
|
|
|
when 'field' |
|
|
if read_data? |
|
|
@row_id = body.chomp if @attributes['name'] == 'id' |
|
|
add_value_to_table_row(@attributes, @body) |
|
|
end |
|
|
|
|
|
end |
|
|
end |
|
|
|
|
|
def add_field_to_table_structure(node) |
|
|
@current_schema[node.attribute('Field')] = node.attribute('Type') |
|
|
|
|
|
def error(string) |
|
|
log_error string |
|
|
end |
|
|
|
|
|
def add_value_to_table_row(node) |
|
|
type = @current_schema[node.attribute('name')] |
|
|
@values << output_postgres_value(node, type) |
|
|
|
|
|
def warning(string) |
|
|
log_warning string |
|
|
end |
|
|
|
|
|
def output_postgres_value(node, type) |
|
|
field = node.attribute('name') |
|
|
value = read_value_of(node, type) |
|
|
|
|
|
def characters(string) |
|
|
@body << string unless @body.nil? |
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Element transformation |
|
|
|
|
|
def add_field_to_table_structure(attributes) |
|
|
current_schema[attributes['Field']] = attributes['Type'] |
|
|
end |
|
|
|
|
|
def add_value_to_table_row(attributes, body) |
|
|
type = current_schema[attributes['name']] |
|
|
@values << output_postgres_value(type, attributes, body) |
|
|
end |
|
|
|
|
|
def output_postgres_value(type, attributes, body) |
|
|
field = attributes['name'] |
|
|
value = read_value_of(type, attributes, body) |
|
|
|
|
|
validate_value!(field, value, type) |
|
|
|
|
|
@@ -158,9 +242,8 @@ class SqlRocket |
|
|
raise NotImplementedError, "Type \"#{type}\" (of field \"#{field}\") is unhandled" |
|
|
end |
|
|
|
|
|
def read_value_of(node, type) |
|
|
return nil if node.attribute('xsi:nil') == "true" |
|
|
value = Nokogiri::XML.fragment(node.inner_xml).text |
|
|
def read_value_of(type, attributes, value) |
|
|
return nil if attributes['xsi:nil'] == "true" |
|
|
coerce_value(value, type) |
|
|
end |
|
|
|
|
|
@@ -169,22 +252,26 @@ class SqlRocket |
|
|
value |
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
FIELDS_WHERE_NIL_IS_PROHIBITED = %w{created_at updated_at} |
|
|
|
|
|
def validate_value!(field, value, type) |
|
|
if FIELDS_WHERE_NIL_IS_PROHIBITED.member?(field) && value.nil? |
|
|
$stderr.puts "\e[31mERROR\e[0m NULL value is not allowed for #{@table_name}.#{field}" |
|
|
log_error "NULL value is not allowed for #{table_name}.#{field}" |
|
|
end |
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Postgres SQL formatting statements |
|
|
|
|
|
def output_nil |
|
|
"NULL" |
|
|
end |
|
|
|
|
|
|
|
|
def output_postgres_boolean(value) |
|
|
output_quoted(value == '1' ? 't' : 'f') |
|
|
end |
|
|
|
|
|
def output_quoted(value) |
|
|
escaped = value |
|
|
.gsub(/'/, "''") |
|
|
@@ -193,15 +280,70 @@ class SqlRocket |
|
|
.gsub(/\\xE2\\x80\\x99/, "’") |
|
|
"'#{escaped}'" |
|
|
end |
|
|
|
|
|
def output_postgres_boolean(value) |
|
|
output_quoted(value == '1' ? 't' : 'f') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Logging |
|
|
|
|
|
def log_error(message) |
|
|
log_with_tags "\e[31mERROR \e[0m#{message}" |
|
|
end |
|
|
|
|
|
|
|
|
def log_warning(message) |
|
|
log_with_tags "\e[33mWARNING \e[0m#{message}" |
|
|
end |
|
|
|
|
|
def log_with_tags(message) |
|
|
$stderr.print message.chomp |
|
|
hash = {} |
|
|
hash["table"] = table_name if table_name |
|
|
hash["id"] = row_id if row_id |
|
|
hash["pos"] = io.pos |
|
|
hash["time"] = "%.2f" % (Time.now - @start) |
|
|
|
|
|
$stderr.print " {" |
|
|
$stderr.print hash.map { |key, value| "#{key}: #{value}" }.join(", ") |
|
|
$stderr.print "}\n" |
|
|
end |
|
|
|
|
|
def log_time_to_read_table |
|
|
end_ts = Time.now |
|
|
end_cpu = Process.times |
|
|
real_seconds = end_ts - @start_ts |
|
|
user_seconds = end_cpu.utime - @start_cpu.utime |
|
|
kernel_seconds = end_cpu.stime - @start_cpu.stime |
|
|
|
|
|
$stderr.print "\e[32m\e[1m#{@rows}\e[0m\e[32m inserts" |
|
|
$stderr.print " for \e[1m#{table_name}\e[0m" |
|
|
$stderr.print "\e[32m in \e[1m#{format_duration(real_seconds)}\e[0m" |
|
|
$stderr.print "\e[32m {user: %.2f, kernel: %.2f}\e[0m" % [user_seconds, kernel_seconds] |
|
|
$stderr.print "\n" |
|
|
end |
|
|
|
|
|
def format_duration(seconds) |
|
|
hours = (seconds / 3600).to_i |
|
|
minutes = (seconds / 60).round |
|
|
|
|
|
if hours > 0 |
|
|
minutes -= (hours * 60) |
|
|
"#{hours} hours and #{minutes} minutes" |
|
|
elsif minutes > 0 |
|
|
"#{minutes} minutes" |
|
|
else |
|
|
"%.2f seconds" % seconds |
|
|
end |
|
|
end |
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# CLI |
|
|
|
|
|
command, arg = ARGV |
|
|
|
|
|
case command |
|
|
|