""" Joins multiple single-file invoices in the StormWare Pohoda XML format to a single XML file. https://www.stormware.cz/xml/schema/version_2/invoice.xsd https://www.stormware.cz/schema/version_2/data.xsd Usage: $ python join_invoices.py 'single_invoices/*.xml' merged_invoice.xml File encoding: windows-1250, CRLF. """ import argparse import glob from tqdm import tqdm import xmltodict def merge_invoices(input_path, output_path): merged_doc = None for path in tqdm(glob.glob(input_path)): # use the first document as the template for the top-level structure if merged_doc is None: merged_doc = load_root_doc(path) doc = load_xml(path) item = extract_item_with_id(doc) merged_doc['dat:dataPack']['dat:dataPackItem'].append(item) save_xml(merged_doc, output_path) def load_root_doc(path): root_doc = load_xml(path) del root_doc['dat:dataPack']['dat:dataPackItem'] # envelope id must not be empty, so let's leave there some dummy value root_doc['dat:dataPack']['@id'] = "00001" root_doc['dat:dataPack']['dat:dataPackItem'] = [] return root_doc def extract_item_with_id(doc): item = doc['dat:dataPack']['dat:dataPackItem'] item['@id'] = doc['dat:dataPack']['@id'] return item def load_xml(path): with open(path, 'rb') as f: return xmltodict.parse(f) def save_xml(doc, path): with open(path, 'wb') as f: xmltodict.unparse(doc, output=f, pretty=True, encoding='Windows-1250', newl='\r\n') def parse_args(): parser = argparse.ArgumentParser(description='Join multiple Pohoda invoices') parser.add_argument('input_path', metavar='INPUT_PATH') parser.add_argument('output_path', metavar='OUTPUT_PATH') return parser.parse_args() if __name__ == '__main__': args = parse_args() merge_invoices(args.input_path, args.output_path)