Skip to content

Instantly share code, notes, and snippets.

@silverwolfx10
Forked from nicwolff/XML_breaker.py
Last active August 2, 2019 00:37
Show Gist options
  • Save silverwolfx10/45ccc6550d82ac24d2d0dc7ac6647eac to your computer and use it in GitHub Desktop.
Save silverwolfx10/45ccc6550d82ac24d2d0dc7ac6647eac to your computer and use it in GitHub Desktop.
Python script to break large XML files
import os
import sys
from xml.sax import parse
from xml.sax.saxutils import XMLGenerator
class CycleFile(object):
def __init__(self, filename):
self.basename, self.ext = os.path.splitext(filename)
self.index = 0
self.open_next_file()
def open_next_file(self):
self.index += 1
filename = '%s%s%s' % (self.basename, self.index, self.ext)
self.file = open(filename, 'w')
def cycle(self):
self.file.close()
self.open_next_file()
def tell(self):
self.file.tell()
def write(self, str):
self.file.write(str)
def writelines(self, sequence):
self.file.writelines(sequence)
def flush(self):
self.file.flush()
def close(self):
self.file.close()
class XMLBreaker(XMLGenerator):
def __init__(self, break_into=None, break_after=1000, out=None, *args, **kwargs):
XMLGenerator.__init__(self, out, *args, **kwargs)
self.out_file = out
self.break_into = break_into
self.break_after = break_after
self.context = []
self.count = 0
def startElement(self, name, attrs):
XMLGenerator.startElement(self, name, attrs)
self.context.append((name, attrs))
def endElement(self, name):
XMLGenerator.endElement(self, name)
self.context.pop()
if name == self.break_into:
self.count += 1
if self.count == self.break_after:
self.count = 0
for element in reversed(self.context):
self.out_file.write("\n")
XMLGenerator.endElement(self, element[0])
self.out_file.cycle()
XMLGenerator.startDocument(self)
for element in self.context:
XMLGenerator.startElement(self, *element)
filename, break_into, break_after = sys.argv[1:]
parse(filename, XMLBreaker(break_into, int(break_after), out=CycleFile(filename)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment