@@ -0,0 +1,120 @@
#!/usr/bin/env python
# encoding: utf-8
"""
csv2xml.py
Created by Justin van Wees on 2011-04-18.
"""
import sys
import os
import string
import re
import csv
import libxml2
VERSION = '0.1 (2011-04-18)'
class ConvertToXML (object ):
def __init__ (self , options , source_csv , dest_file = None , ):
self .csv = self .parse_csv (filename = source_csv , delimiter = options .delimiter ,
quotechar = options .quotechar )
self .headers = self .parse_headers (self .csv [0 ])
self .xml = self .create_xml (root_element = options .xml_root ,
record_element = options .xml_record ,
headers = self .headers , csv = self .csv )
if dest_file :
self .save (dest_file , self .xml )
else :
print self .xml
def parse_csv (self , filename , delimiter , quotechar ):
csv .register_dialect ('custom' , delimiter = delimiter ,
quotechar = quotechar )
csv_file = open (filename , mode = 'r' )
csv_file = list (csv .reader (csv_file ))
return csv_file
def parse_headers (self , headers ):
punct = set (string .punctuation )
parsed_headers = []
for head in headers :
# Strip punct
head = '' .join (ch for ch in head if ch not in punct )
# Strip whitespace at beginning and end of string, make lowercase
head = head .strip ().lower ()
# Replace space with underscores
head = head .replace (' ' , '_' )
parsed_headers .append (head )
return parsed_headers
def create_xml (self , root_element , record_element , headers , csv ):
doc = libxml2 .newDoc (version = '1.0' )
root = doc .newChild (None , root_element , None )
for record in csv [1 :]:
this_record = root .newChild (None , record_element , None )
for index , header in enumerate (headers ):
if len (record [index ]) > 0 :
this_record .newChild (None , header , record [index ])
else :
this_record .newChild (None , header , None )
return doc .serialize (encoding = 'utf-8' , format = 1 )
def create_xml2 (self , root_element , record_element , headers , csv ):
doc = Document ()
root = doc .createElement (root_element )
doc .appendChild (root )
for record in csv [1 :]:
this_record = doc .createElement (record_element )
for index , header in enumerate (headers ):
this_item = doc .createElement (unicode (header , 'utf-8' ))
if len (record [index ]) > 0 :
this_item .appendChild (doc .createTextNode (unicode (record [index ], 'utf-8' )))
this_record .appendChild (this_item )
root .appendChild (this_record )
print doc .toprettyxml (encoding = "UTF-8" )
def save (self , filename , xml ):
xml_file = open (filename , 'w' )
xml_file .write (xml )
xml_file .close ()
if __name__ == '__main__' :
from optparse import OptionParser
parser = OptionParser (version = "%prog " + VERSION ,
usage = '%prog [options] SOURCE_CSV DEST_XML' )
parser .disable_interspersed_args ()
parser .add_option ('-d' , '--delimiter' , dest = 'delimiter' , type = 'str' , default = ',' ,
help = "One-char string used to separate fields in the CSV file" )
parser .add_option ('-q' , '--quote-char' , dest = 'quotechar' , type = 'str' ,
default = '"' , help = "One-char string used to quote fields that contain 'special' chars" )
parser .add_option ('-r' , '--root-element' , dest = "xml_root" , type = 'str' , default = 'root' ,
help = "Name of the root element" )
parser .add_option ('-i' , '--record-element' , dest = "xml_record" , type = 'str' ,
default = 'record' , help = "Name of the record elements" )
(options , args ) = parser .parse_args ()
if len (args ) > 1 :
xml = ConvertToXML (options , args [0 ], args [1 ])
else :
print ConvertToXML (options , args [0 ])