#import wget
import textract

# url = 'https://www.utcourts.gov/cal/data/SLC_Calendar.pdf'

# pdf = wget.download(url)
print "Processing pdf into text..."
pdf_text_raw = textract.process("SLC_Calendar.pdf") # Load text into list
print "Formatting..."
pdf_text_lines=pdf_text_raw.splitlines() # Convert '\n' into new lines
print "Structuring data..."
pdf_text_array=[line.split() for line in pdf_text_lines] # Load lines into 2D array
print "Searching for start of entries..."
row = 0
while row < len(pdf_text_array)-1:
        col = 0
        while col < len(pdf_text_array[row])-1:
                if (( pdf_text_array[row][col] == "Page" ) and
                        ( pdf_text_array[row+2][1] == "3RD" ) and
                        ( pdf_text_array[row+3][1] == "BERNARDS-GOODMAN" ) and
                        ( pdf_text_array[row+7][0] == "September" ) and
                        ( pdf_text_array[row+7][1] == 29 ) and
                        ( pdf_text_array[row+7][2] == 2017 )):
                                start = [row][col]
                                break
                col = col+1
        row = row+1
print "Found start position %" % start

while pos <= len(pdf_text_array) - 1:
        if (( pdf_text_array[pos][1] == "Page" ) and
                ( pdf_text_array[pos+1][1] != "BERNARDS-GOODMAN" )):
                        end = pos
                        break
        pos = pos+1
print "Found end position %" % end