raphiz · September 28, 2015 19:52 · Sep 28, 2015
diff --git a/pdf_remove_watermark.py b/pdf_remove_watermark.py
@@ -0,0 +1,37 @@
+from PyPDF2 import PdfFileReader, PdfFileWriter
+from PyPDF2.pdf import ContentStream
+from PyPDF2.generic import TextStringObject, NameObject
+from PyPDF2.utils import b_
+
+wm_text = 'Persönliches Exemplar von'
+replace_with = ''
+
+# Load PDF into pyPDF
+source = PdfFileReader(open('input.pdf', "rb"))
+output = PdfFileWriter()
+
+# For each page
+for page in range(source.getNumPages()):
+    # Get the current page and it's contents
+    page = source.getPage(page)
+    content_object = page["/Contents"].getObject()
+    content = ContentStream(content_object, source)
+
+    # Loop over all pdf elements
+    for operands, operator in content.operations:
+        # You might adapt this part depending on your PDF file
+        if operator == b_("TJ"):
+            text = operands[0][0]
+            if isinstance(text, TextStringObject) and text.startswith(wm_text):
+                operands[0] = TextStringObject(replace_with)
+
+
+    # Set the modified content as content object on the page
+    page.__setitem__(NameObject('/Contents'), content)
+
+    # Add the page to the output
+    output.addPage(page)
+
+# Write the stream
+outputStream = open("output.pdf", "wb")
+output.write(outputStream)