Skip to content

Instantly share code, notes, and snippets.

@quickbreach
Last active July 17, 2021 22:40
Show Gist options
  • Select an option

  • Save quickbreach/5456d162db9aae4042c7313e7ad02dc6 to your computer and use it in GitHub Desktop.

Select an option

Save quickbreach/5456d162db9aae4042c7313e7ad02dc6 to your computer and use it in GitHub Desktop.

Revisions

  1. quickbreach revised this gist Jul 15, 2020. 1 changed file with 7 additions and 6 deletions.
    13 changes: 7 additions & 6 deletions mysqlToJson.py
    Original file line number Diff line number Diff line change
    @@ -6,12 +6,12 @@
    from requests.auth import HTTPBasicAuth
    import hashlib
    '''
    Usage: 'python3 mysqlToJson.py <targetFile.sql>
    About:
    Parses an '.sql' file from mysqldump and converts it to JSON for elastic or other document DBS.
    ---
    The mysqlFileToArray function parses the sql file into into python objects & arrays,
    which can be converted into JSON with json.dumps(arrayOfObjects)
    The mysqlFileToArray function parses the sql file into into python objects & arrays, which can be converted into JSON with json.dumps(arrayOfObjects)
    Each resulting object represents a row from a table in the original SQL.
    @@ -130,7 +130,8 @@ def sample_postToElastic(document):
    # Prints out the converted document
    print(json.dumps(document))

    # Add the document to your db
    # sample_postToElastic(document) # Modify sample_postToElastic for your elastic stack, or use it as guide for other NoSQL document DBs
    # Add the document to your db
    # sample_postToElastic(document)
    # Modify sample_postToElastic for your elastic stack, or use it as guide for other NoSQL document DBs


  2. quickbreach revised this gist Jul 15, 2020. No changes.
  3. quickbreach created this gist Jul 15, 2020.
    136 changes: 136 additions & 0 deletions mysqlToJson.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,136 @@
    import re
    import csv
    import json
    import sys
    import requests
    from requests.auth import HTTPBasicAuth
    import hashlib
    '''
    Parses an '.sql' file from mysqldump and converts it to JSON for elastic or other document DBS.
    ---
    The mysqlFileToArray function parses the sql file into into python objects & arrays,
    which can be converted into JSON with json.dumps(arrayOfObjects)
    Each resulting object represents a row from a table in the original SQL.
    mysqlFileToArray adds the fields 'sqltojson_table_name' and 'sqltojson_source_file' to each document. keep or delete them as you wish, ie.
    del(document['sqltojson_table_name'])
    del(document['sqltojson_source_file'])
    '''

    # The brains of the operation
    def mysqlFileToArray(sqlFile):
    tac = dict()
    tmpDb = []
    currentTable = ''
    inTableStructure = False

    #
    for line in open(sqlFile, "r"):

    #
    if re.search('^CREATE TABLE `\S.+`', line) != None:
    inTableStructure = True
    currentTable = re.search('^CREATE TABLE `\S.+`', line).group(0).replace('CREATE TABLE ', '').replace('`', '')
    tac[currentTable] = []
    continue

    #
    if inTableStructure:
    #
    if line.find('PRIMARY KEY') > -1 or line.find(' KEY `') > -1:
    continue

    # Create a map of each table name and it's associated columns. Each column name is preceeded by two spaces
    if re.search('^\ \ `\S.+`', line) != None:
    columnName = re.search('^\ \ `\S.+`', line).group(0).strip().replace('`', '')
    tac[currentTable].append(columnName)

    # This tells us that we are no longer in table description
    if re.search('^\)', line) != None:
    inTableStructure = False
    continue

    #
    if re.search('^INSERT INTO `' + currentTable + '` VALUES', line) != None:
    line = line[line.find("("):]
    entriesArray = line.split("),(")

    # Remove the semicolon at the end of the last entry
    if (entriesArray[-1][-1] == ';'):
    entriesArray[-1] = entriesArray[-1][:-1]

    # iterate through the table rows
    for entry in entriesArray:
    entry = entry[1:] # Remove the leading (
    try:
    tmp = dict()
    # Carve apart the row with the CSV parser
    cvp = list(csv.reader([entry], quotechar="'", delimiter=',', escapechar='\\'))[0]

    # Create
    for x in range(0, len(cvp)):
    columnName = tac[currentTable][x]
    tmp[columnName] = cvp[x]

    tmp['sqltojson_table_name'] = currentTable.replace(");", "")
    tmp['sqltojson_source_file'] = sqlFile

    tmpDb.append(tmp)
    del(tmp)

    except Exception as e:
    print("Exception in line-49 loop: " + str(e))


    # Now take it all and convert it to JSON
    return tmpDb

    # Takes in the array from sqlFileToArray, then converts each document in it to JSON and posts it to elastic
    def sample_postToElastic(document):
    indexName = ""
    headers = {
    'Content-Type': 'application/json'
    }
    # Choose your own adventure: you pick what index the data goes into. I'm recreating the speration of tables here
    indexName = document['sqltojson_table_name']

    # Helps avoid duplicate entries, especially if you run the job multiple times
    s = hashlib.sha1()
    s.update(json.dumps(document).encode('utf-8'))
    docId = s.hexdigest()

    # Posts the document to an index with the same name as the original table
    resp = requests.post('http://127.0.0.1:9200/' + indexName + '/_doc/' + str(docId), auth = HTTPBasicAuth('username', 'password'), headers = headers, data = json.dumps(document))
    if(resp.status_code != 200 and resp.status_code != 201):
    # Something went wrong
    print("Elastic returned non-successful response")
    print(resp.__dict__)
    exit(-1)


    if len(sys.argv) < 2:
    print("Usage: python3 mysqlToJson.py <targetFile.sql>")
    exit(0)

    # Convert the sql file to an array of JSON documents. Each will have an added 'sqltojson_table_name' field to identify which "table" it belongs to
    convertedDb = mysqlFileToArray(sys.argv[1])

    # Add each document to elastic. modify the sample_postToElastic to fit your needs
    for document in convertedDb:
    '''
    # Modify the object as you wish here, ie add an '_id' field
    s = hashlib.sha1()
    s.update(json.dumps(document).encode('utf-8'))
    document['_id'] = s.hexdigest()
    '''

    # Prints out the converted document
    print(json.dumps(document))

    # Add the document to your db
    # sample_postToElastic(document) # Modify sample_postToElastic for your elastic stack, or use it as guide for other NoSQL document DBs