import PyPDF2
import subprocess

pdfIn = open('presentations.pdf', 'rb')

reader = PyPDF2.PdfFileReader(pdfIn)

writer = PyPDF2.PdfFileWriter()

for i in range(reader.numPages):
    page = reader.getPage(i)

    # This didn't work so using others instead.
    # contents = page.extractText()

    # get this page as it's own file so it can be parsed.
    bufWriter = PyPDF2.PdfFileWriter()
    bufWriter.addPage(page)

    # Write this page to a pdf on its own to parse.
    bufFile = open('buffer.pdf', 'wb')
    bufWriter.write(bufFile)
    bufFile.close()

    # Put the text of the page into a txt file.
    subprocess.call(['pdftotext', 'buffer.pdf', 'buffer.txt'])

    # If that .txt contains "question" then write.
    with open('buffer.txt', 'r') as bufTxt:
        pageTxt = bufTxt.read()
        if "question" in pageTxt.lower():
            writer.addPage(page)


fileOutput = open('questions.pdf', 'wb')

writer.write(fileOutput)

pdfIn.close()
fileOutput.close()