-
-
Save vigneshwaranr/3454093 to your computer and use it in GitHub Desktop.
| #! /bin/sh | |
| usage_error () { | |
| echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>' | |
| echo | |
| echo 'Example:' | |
| echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps' | |
| echo | |
| echo 'Tested on:' | |
| echo 'Python 2.7.3' | |
| echo 'SQLite 3.7.9' | |
| } | |
| if [ ! $# -eq 3 ] | |
| then | |
| usage_error | |
| exit 1 | |
| fi | |
| if [ ! -r $1 ] | |
| then | |
| echo $1' is not readable.' | |
| echo 'Please give the correct path to sqlite_to_postgres.py' | |
| exit 1 | |
| fi | |
| if [ ! -r $2 ] | |
| then | |
| echo $2' is not readable' | |
| exit 1 | |
| fi | |
| if [ ! -d $3 ] | |
| then | |
| echo $3' is not a valid directory' | |
| exit 1 | |
| fi | |
| #Get the list of tables | |
| echo .tables | sqlite3 $2 > $3/lsoftbls | |
| #Get dumps from sqlite | |
| for i in `cat $3/lsoftbls` | |
| do | |
| echo 'Generating sqlite dumps for '$i | |
| echo '.output '$3'/'$i'.dump' > $3/dumper | |
| echo 'pragma table_info('$i');' >> $3/dumper | |
| echo '.dump '$i >> $3/dumper | |
| echo '.quit' >> $3/dumper | |
| cat $3/dumper | sqlite3 $2 | |
| done | |
| #Use the python script to convert the sqlite dumps to psql dumps | |
| echo | |
| echo 'Now converting the sqlite dumps into psql format...' | |
| echo | |
| for i in `ls -1 $3/*.dump` | |
| do | |
| python $1 $i | |
| done | |
| #Remove the sqlite3 dumps and the file 'lsoftbls' | |
| echo | |
| echo 'Removing temporary files..' | |
| rm $3/*.dump | |
| rm $3/lsoftbls | |
| rm $3/dumper | |
| echo 'Removing empty dump files..' | |
| wc -l $3/*.psql | grep -w 0 | awk '{ print $NF }' | xargs rm | |
| echo ; echo 'Done.'; echo | |
| echo 'Please find the psql dumps at '$3 |
| 0|display_name|varchar(64)|1||0 | |
| 1|name|varchar(64)|1||0 | |
| 2|local_site_id|integer|0||0 | |
| 3|incoming_request_count|integer|0||0 | |
| 4|invite_only|bool|1||0 | |
| 5|id|integer|1||1 | |
| 6|mailing_list|varchar(75)|1||0 | |
| 7|visible|bool|1||0 | |
| PRAGMA foreign_keys=OFF; | |
| BEGIN TRANSACTION; | |
| CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL); | |
| INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1); | |
| INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1); | |
| INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1); | |
| INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1); | |
| INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1); | |
| COMMIT; |
| INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE); | |
| INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE); | |
| INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE); | |
| INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE); | |
| INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE); |
| #! /usr/bin/python | |
| # SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans | |
| # This python script serves a single purpose of converting the sqlite dumps | |
| # into postres-compatible dumps by converting the boolean values. | |
| import random | |
| import sys | |
| import os.path | |
| BOUNDARY = '%$#@!~R@ND0M^&*()_B0UND@RY<>?:'+str(int(random.random()*(10**10))) | |
| COLUMNS = [] | |
| COLUMN_NAMES = "" | |
| COLUMN_TYPES = () | |
| def usage(): | |
| ''' | |
| Print usage and exit | |
| ''' | |
| print "Usage: ./bool_changer.py <filename.dump>" | |
| sys.exit() | |
| def fix_column_names(first_line): | |
| ''' | |
| The insert statement from sqlite3 dump is as follows: | |
| INSERT INTO "test" VALUES(1,'Hello'); | |
| We need to add the column information to the statements like this: | |
| INSERT INTO "test" (id,name) VALUES(1,'Wibble'); | |
| This is necessary because the column orders may be different in psql db. | |
| ''' | |
| global COLUMN_NAMES | |
| index = first_line.index(' VALUES') | |
| return first_line[:index] + COLUMN_NAMES + first_line[index:] | |
| def fix_bool(stmt): | |
| from_here = 'VALUES(' | |
| start_pos = stmt.index(from_here) + len(from_here) | |
| cur_pos = start_pos | |
| newstmt = stmt[:start_pos] # [INSERT ... VALUES(] | |
| stmtlen = len(stmt) | |
| no_of_cols = len(COLUMN_TYPES) | |
| for i in range(0,no_of_cols): | |
| if COLUMN_TYPES[i] == 'bool': | |
| newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same | |
| if stmt[cur_pos] == '1': newstmt += 'TRUE' | |
| elif stmt[cur_pos] == '0': newstmt += 'FALSE' | |
| if i == no_of_cols-1: #i.e. last column | |
| newstmt += ');\n' | |
| break | |
| newstmt += ',' #not last column | |
| cur_pos += 2 | |
| start_pos = cur_pos | |
| else: | |
| if i == no_of_cols-1: #if it's the last non-bool column, then | |
| newstmt += stmt[start_pos:] #simply insert everything that's left | |
| break #and leave | |
| if stmt[cur_pos] != "'": | |
| for cur_pos in range(cur_pos+1,stmtlen): | |
| if stmt[cur_pos] == ',': | |
| cur_pos += 1 | |
| break #the inner loop and go to next column | |
| else: # the 'problematic' place. cur_pos in "'" | |
| cur_pos += 1 #what's next after "'"? | |
| while cur_pos < stmtlen: | |
| if stmt[cur_pos] == "'": | |
| if stmt[cur_pos+1] == "'": #ignore escaped quote ('') | |
| cur_pos += 2 | |
| continue #searching | |
| elif stmt[cur_pos+1] == ",": #end of string | |
| cur_pos += 2 | |
| break #to next column | |
| cur_pos += 1 | |
| return newstmt | |
| def get_psql_inserts(insert_lines): | |
| ''' | |
| This method will get a list of one or more lines that together constitute | |
| a single insert statement from the sqlite dump, manipulates it and | |
| returns the list containing the psql compatible insert statement. | |
| ''' | |
| global BOUNDARY | |
| #First fix the column name issue. | |
| insert_lines[0] = fix_column_names(insert_lines[0]) | |
| if 'bool' in COLUMN_TYPES: | |
| insert_stmt = BOUNDARY.join(insert_lines) | |
| insert_stmt = fix_bool(insert_stmt) | |
| insert_lines = insert_stmt.split(BOUNDARY) | |
| return insert_lines | |
| def process_dump(input_file,output_file): | |
| ''' | |
| Process the file lazily line by line | |
| ''' | |
| def process_insert(insert_lines): | |
| ''' | |
| Helper method to write psql commands into output_file | |
| ''' | |
| psql_inserts = get_psql_inserts(insert_lines) | |
| output_file.writelines(psql_inserts) | |
| global COLUMNS | |
| global COLUMN_NAMES | |
| global COLUMN_TYPES | |
| after_pragma = False #The first few lines will be schema info upto the | |
| #line that starts with "PRAGMA" | |
| insert_started = False | |
| insert_lines = [] | |
| insert_stmt_start = 'INSERT' | |
| for line in input_file: | |
| #Get the schema info from the head of the dump file | |
| if not after_pragma: | |
| if line[0].isdigit(): | |
| COLUMNS.append(tuple(line.split('|')[1:3])) | |
| elif line.startswith('PRAGMA'): | |
| after_pragma = True | |
| COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS])) | |
| COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS]) | |
| #Python uses single quotes for enclosing a string. | |
| #But psql uses double quotes on "column names" and | |
| #single quotes on strings inside VALUES(..) | |
| COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"') | |
| continue | |
| #Ignore the lines from PRAGMA and before INSERT. | |
| if not insert_started: | |
| if line.startswith('CREATE TABLE'): | |
| table_name = line[line.index('"'):] | |
| table_name = table_name[:table_name.index('"',1)+1] # '"table_name"' | |
| insert_stmt_start = 'INSERT INTO ' + table_name | |
| elif line.startswith('INSERT'): | |
| insert_started = True | |
| else: continue | |
| #If the control reaches here, it must mean that the first insert statement | |
| #has appeared. But the insert statements may span multiple lines. So, we | |
| #collect those lines and process them. | |
| if line.startswith(insert_stmt_start): | |
| if insert_lines: #True from 2nd insert statement | |
| process_insert(insert_lines) #Insert the previous insert statement | |
| insert_lines = [line] #and append the current one | |
| elif insert_lines: | |
| insert_lines.append(line) | |
| if not insert_lines: return | |
| while insert_lines[-1].endswith(';\n') and \ | |
| (insert_lines[-1].startswith('CREATE INDEX') or \ | |
| insert_lines[-1].startswith('COMMIT')): | |
| insert_lines.pop() #remove the create index and commit lines at the end | |
| process_insert(insert_lines) #fix the last insert statement | |
| if __name__ == '__main__': | |
| if len(sys.argv) != 2: | |
| usage() | |
| filename = sys.argv[1] | |
| output_filename = filename + '.psql' | |
| if not os.path.isfile(filename): | |
| print "FATAL: Not a valid filename" | |
| usage() | |
| print sys.argv[0], ': Trying to convert', sys.argv[1] | |
| try: | |
| input_file = open(filename,'r') | |
| output_file = open(output_filename,'w') | |
| process_dump(input_file,output_file) | |
| finally: | |
| input_file.close() | |
| output_file.close() | |
| print sys.argv[0], ': Converted to', output_filename | |
Traceback (most recent call last):
File "sqlite_to_postgres.py", line 180, in <module>
process_dump(input_file,output_file)
File "sqlite_to_postgres.py", line 137, in process_dump
table_name = line[line.index('"'):]
ValueError: substring not found
Changed lines 136 and 137 to this:
table_name = line[line.index('['):]
table_name = table_name[:table_name.index(']',1)+1]
My sqlite3 commands were not quoting the table names, instead it uses brackets. This fixes that.
awesome. I found this actually also trying to convert a sqlite reviewboard database into postgres!
How did you know the right order to import all the separate dump files (one per table) without violating foreign key constraints?
FYI. Found a way to do import them in the right order.
- Output the schema separately from sqlite
- Manually edit schema and keep trying to import until the order of the tables imports correctly w/o foreign key reference errors
- A combination of grep/sed on that schema.sql to extract out the table names in order
- Some more shell scripting on those table names to concatenate your various dump files into the right order
In case this is helpful to someone else...
cat schema.sql | grep "CREATE TABLE" | sed -e "s/CREATE TABLE \"//" | sed -e "s/\".*//" > table-order.txt
echo "BEGIN TRANSACTION;" > whole-dump.sql
for t in `cat table-order.txt`; do
cat dumps/$t.dump.sql >> whole-dump.sql
done
echo "COMMIT;" >> whole-dump.sql
@matthauck I just disabled the constraints temporarily in a transaction. I concatenated all the sql commands into one file and wrapped the contents like the following..
begin;
SET CONSTRAINTS ALL DEFERRED;
<ALL DUMP COMMANDS>
commit;
Then imported this dump in one go! Make sure you test this plenty of times on backup data and setup.
Reference: http://www.openscope.net/2012/08/23/subverting-foreign-key-constraints-in-postgres-or-mysql/
awesome, thanks, this works great! Just one thing: You'll also probably need to reset all your sequences. For that, https://wiki.postgresql.org/wiki/Fixing_Sequences has a generic solution.
This was a life saver. Thanks a lot!
Just a note, I was using this to export a Grafana SQLite3 DB, ran into the same error as @smclenithan - Grafana uses backticks (`) instead of quotations marks for table names, so:
Change lines 136 and 137 to:
table_name = line[line.index('`'):]
table_name = table_name[:table_name.index('`',1)+1
And it works.
Cheers
@gl1tchh and @smclenithan offers own fixes for "substring not found" error but both is useless in my case.
My environment:
SQLite version 3.8.7.1
Debian GNU/Linux 8
My sqlite3 dump exports CREATE TABLE table_name without "quotations" or backticks or any other separator:
CREATE TABLE v_users (
...
INSERT INTO "v_users" VALUES(
...
So, I check how python works with string (I'm not a programmer) and make own fix for searching tablename in the CREATE TABLE string:
if line.startswith('CREATE TABLE'):
table_name = line[13:] #--modified original script line 136
table_name = table_name[:table_name.index(' ')] # '"table_name"' #--modified original script line 137
insert_stmt_start = 'INSERT INTO "' + table_name + '"' #--modified original script line 138
Explaining:
String part "CREATE TABLE" has 12 chars and with 1 white space we got a 13 characters. So our table_name located after 13 character (line 136) and before the next white space (line 137). But also we must add quotations in the line 138, because our table_name is not quoted, but 'INSERT INTO ' command must have quotations for table_name.
Thank you