AhmedSamara · December 15, 2018 20:21 · Dec 15, 2018
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1,42 @@
+import PyPDF2
+import subprocess
+
+pdfIn = open('presentations.pdf', 'rb')
+
+reader = PyPDF2.PdfFileReader(pdfIn)
+
+writer = PyPDF2.PdfFileWriter()
+
+for i in range(reader.numPages):
+    page = reader.getPage(i)
+
+    # This didn't work so using others instead.
+    # contents = page.extractText()
+
+    # get this page as it's own file so it can be parsed.
+    bufWriter = PyPDF2.PdfFileWriter()
+    bufWriter.addPage(page)
+
+    # Write this page to a pdf on its own to parse.
+    bufFile = open('buffer.pdf', 'wb')
+    bufWriter.write(bufFile)
+    bufFile.close()
+
+    # Put the text of the page into a txt file.
+    subprocess.call(['pdftotext', 'buffer.pdf', 'buffer.txt'])
+
+    # If that .txt contains "question" then write.
+    with open('buffer.txt', 'r') as bufTxt:
+        pageTxt = bufTxt.read()
+        if "question" in pageTxt.lower():
+            writer.addPage(page)
+
+
+
+
+fileOutput = open('questions.pdf', 'wb')
+
+writer.write(fileOutput)
+
+pdfIn.close()
+fileOutput.close()