Skip to content

Instantly share code, notes, and snippets.

@piorus
Created July 1, 2018 20:18
Show Gist options
  • Select an option

  • Save piorus/37bdbadab0ff17fb74b488d245bce67d to your computer and use it in GitHub Desktop.

Select an option

Save piorus/37bdbadab0ff17fb74b488d245bce67d to your computer and use it in GitHub Desktop.

Revisions

  1. piorus created this gist Jul 1, 2018.
    39 changes: 39 additions & 0 deletions pdf_merge.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,39 @@
    #!/usr/bin/env python
    import sys

    from os import listdir
    from os.path import isfile, join, dirname, realpath

    try:
    from PyPDF2 import PdfFileReader, PdfFileWriter
    except ImportError:
    from pyPdf import PdfFileReader, PdfFileWriter

    def get_files(ext):
    path = dirname(realpath(__file__))
    return [f for f in listdir(path) if isfile(join(path, f)) and join(path, f).endswith(ext)]

    def pdf_cat(input_files, output_stream):
    input_streams = []
    try:
    # First open all the files, then produce the output file, and
    # finally close the input files. This is necessary because
    # the data isn't read from the input files until the write
    # operation. Thanks to
    # https://stackoverflow.com/questions/6773631/problem-with-closing-python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733#6773733
    for input_file in input_files:
    input_streams.append(open(input_file, 'rb'))
    writer = PdfFileWriter()
    for reader in map(PdfFileReader, input_streams):
    for n in range(reader.getNumPages()):
    writer.addPage(reader.getPage(n))
    writer.write(output_stream)
    finally:
    for f in input_streams:
    f.close()

    if __name__ == '__main__':
    if sys.platform == "win32":
    import os, msvcrt
    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
    pdf_cat(get_files('.pdf'), open('result.pdf', 'w+b'))