import cv2
import numpy as np
from pdf2image import convert_from_path
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("filename", help="Enter a pdf filename to process")
args = parser.parse_args()


def dilate_image(image):
    full_width = 330
    check_width = 110

    crop = image[610:610+1630, 440:440+check_width]
    #cv2.imwrite("crop.png", crop)

    gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
    #cv2.imwrite("gray.png", gray)

    blur = cv2.GaussianBlur(gray, (7,7), 0)
    #cv2.imwrite("blur.png", blur)

    thresh = cv2.threshold(blur, 0,255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    #cv2.imwrite("thresh.png", thresh)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,13))
    #cv2.imwrite("kernel.png", kernel)

    dilate = cv2.dilate(thresh, kernel, iterations=1)
    # cv2.imwrite("dilate.png", dilate)
    return dilate


def detect_values(dilate, deviance = 1.1):
    # loop rows
    start_y = 0
    row_height = 160
    hist = []
    for i in range(1, 11):

        row_img = dilate[ start_y:start_y + row_height, 0:dilate.shape[1] ]
#        cv2.imwrite('row-%d.png' % i, row_img)
        start_y = start_y + row_height

        white_pixels = cv2.countNonZero(row_img)
        total_pixels = row_img.size
        avg_pixels = white_pixels / total_pixels

        hist.append(avg_pixels)

    mean = np.min(hist)
    diff_from_mean = abs(hist - mean)

    threshold = deviance * np.std(hist)
    deviant_indices = np.where(diff_from_mean > threshold)[0]


    ret = []
    for index in deviant_indices:
#        print(index+1, hist[index])
        ret.append(index+1)
    return ret

pages = convert_from_path(args.filename, 500)

for num,page in enumerate(pages):
#    page.save('out-%d.png' % num, 'PNG')
    image = np.array(page) 
    dilate = dilate_image(image)
    values = detect_values(dilate)
    print(f'Page: {num+1} checked: {values}')

    # Convert RGB to BGR 
    #open_cv_image = open_cv_image[:, :, ::-1].copy()