Created
September 28, 2015 19:52
-
-
Save raphiz/3cd332d980d6f4e4cb9c to your computer and use it in GitHub Desktop.
Revisions
-
raphiz created this gist
Sep 28, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,37 @@ from PyPDF2 import PdfFileReader, PdfFileWriter from PyPDF2.pdf import ContentStream from PyPDF2.generic import TextStringObject, NameObject from PyPDF2.utils import b_ wm_text = 'Persönliches Exemplar von' replace_with = '' # Load PDF into pyPDF source = PdfFileReader(open('input.pdf', "rb")) output = PdfFileWriter() # For each page for page in range(source.getNumPages()): # Get the current page and it's contents page = source.getPage(page) content_object = page["/Contents"].getObject() content = ContentStream(content_object, source) # Loop over all pdf elements for operands, operator in content.operations: # You might adapt this part depending on your PDF file if operator == b_("TJ"): text = operands[0][0] if isinstance(text, TextStringObject) and text.startswith(wm_text): operands[0] = TextStringObject(replace_with) # Set the modified content as content object on the page page.__setitem__(NameObject('/Contents'), content) # Add the page to the output output.addPage(page) # Write the stream outputStream = open("output.pdf", "wb") output.write(outputStream)