Skip to content

Instantly share code, notes, and snippets.

@ebunt
Forked from justinvw/csv2xml.py
Created August 8, 2021 15:40
Show Gist options
  • Select an option

  • Save ebunt/2efdde4c566325c6da549568dff05cf8 to your computer and use it in GitHub Desktop.

Select an option

Save ebunt/2efdde4c566325c6da549568dff05cf8 to your computer and use it in GitHub Desktop.

Revisions

  1. @justinvw justinvw created this gist Apr 19, 2011.
    120 changes: 120 additions & 0 deletions csv2xml.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,120 @@
    #!/usr/bin/env python
    # encoding: utf-8
    """
    csv2xml.py
    Created by Justin van Wees on 2011-04-18.
    """
    import sys
    import os
    import string
    import re
    import csv
    import libxml2


    VERSION = '0.1 (2011-04-18)'

    class ConvertToXML(object):
    def __init__(self, options, source_csv, dest_file=None, ):
    self.csv = self.parse_csv(filename=source_csv, delimiter=options.delimiter,
    quotechar=options.quotechar)
    self.headers = self.parse_headers(self.csv[0])

    self.xml = self.create_xml(root_element=options.xml_root,
    record_element=options.xml_record,
    headers=self.headers, csv=self.csv)

    if dest_file:
    self.save(dest_file, self.xml)
    else:
    print self.xml

    def parse_csv(self, filename, delimiter, quotechar):
    csv.register_dialect('custom', delimiter=delimiter,
    quotechar=quotechar)

    csv_file = open(filename, mode='r')
    csv_file = list(csv.reader(csv_file))
    return csv_file

    def parse_headers(self, headers):
    punct = set(string.punctuation)
    parsed_headers = []

    for head in headers:
    # Strip punct
    head = ''.join(ch for ch in head if ch not in punct)

    # Strip whitespace at beginning and end of string, make lowercase
    head = head.strip().lower()

    # Replace space with underscores
    head = head.replace(' ', '_')

    parsed_headers.append(head)

    return parsed_headers

    def create_xml(self, root_element, record_element, headers, csv):
    doc = libxml2.newDoc(version='1.0')
    root = doc.newChild(None, root_element, None)

    for record in csv[1:]:
    this_record = root.newChild(None, record_element, None)

    for index, header in enumerate(headers):
    if len(record[index]) > 0:
    this_record.newChild(None, header, record[index])
    else:
    this_record.newChild(None, header, None)

    return doc.serialize(encoding='utf-8', format=1)

    def create_xml2(self, root_element, record_element, headers, csv):
    doc = Document()

    root = doc.createElement(root_element)
    doc.appendChild(root)

    for record in csv[1:]:
    this_record = doc.createElement(record_element)

    for index, header in enumerate(headers):
    this_item = doc.createElement(unicode(header, 'utf-8'))

    if len(record[index]) > 0:
    this_item.appendChild(doc.createTextNode(unicode(record[index], 'utf-8')))

    this_record.appendChild(this_item)

    root.appendChild(this_record)

    print doc.toprettyxml(encoding="UTF-8")

    def save(self, filename, xml):
    xml_file = open(filename, 'w')
    xml_file.write(xml)
    xml_file.close()

    if __name__ == '__main__':
    from optparse import OptionParser

    parser = OptionParser(version="%prog " + VERSION,
    usage='%prog [options] SOURCE_CSV DEST_XML')
    parser.disable_interspersed_args()
    parser.add_option('-d', '--delimiter', dest='delimiter', type='str', default=',',
    help="One-char string used to separate fields in the CSV file")
    parser.add_option('-q', '--quote-char', dest='quotechar', type='str',
    default='"', help="One-char string used to quote fields that contain 'special' chars")
    parser.add_option('-r', '--root-element', dest="xml_root", type='str', default='root',
    help="Name of the root element")
    parser.add_option('-i', '--record-element', dest="xml_record", type='str',
    default='record', help="Name of the record elements")


    (options, args) = parser.parse_args()
    if len(args) > 1:
    xml = ConvertToXML(options, args[0], args[1])
    else:
    print ConvertToXML(options, args[0])