import cv2 import numpy as np from pdf2image import convert_from_path import argparse parser = argparse.ArgumentParser() parser.add_argument("filename", help="Enter a pdf filename to process") args = parser.parse_args() def dilate_image(image): full_width = 330 check_width = 110 crop = image[610:610+1630, 440:440+check_width] #cv2.imwrite("crop.png", crop) gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) #cv2.imwrite("gray.png", gray) blur = cv2.GaussianBlur(gray, (7,7), 0) #cv2.imwrite("blur.png", blur) thresh = cv2.threshold(blur, 0,255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] #cv2.imwrite("thresh.png", thresh) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,13)) #cv2.imwrite("kernel.png", kernel) dilate = cv2.dilate(thresh, kernel, iterations=1) # cv2.imwrite("dilate.png", dilate) return dilate def detect_values(dilate, deviance = 1.1): # loop rows start_y = 0 row_height = 160 hist = [] for i in range(1, 11): row_img = dilate[ start_y:start_y + row_height, 0:dilate.shape[1] ] # cv2.imwrite('row-%d.png' % i, row_img) start_y = start_y + row_height white_pixels = cv2.countNonZero(row_img) total_pixels = row_img.size avg_pixels = white_pixels / total_pixels hist.append(avg_pixels) mean = np.min(hist) diff_from_mean = abs(hist - mean) threshold = deviance * np.std(hist) deviant_indices = np.where(diff_from_mean > threshold)[0] ret = [] for index in deviant_indices: # print(index+1, hist[index]) ret.append(index+1) return ret pages = convert_from_path(args.filename, 500) for num,page in enumerate(pages): # page.save('out-%d.png' % num, 'PNG') image = np.array(page) dilate = dilate_image(image) values = detect_values(dilate) print(f'Page: {num+1} checked: {values}') # Convert RGB to BGR #open_cv_image = open_cv_image[:, :, ::-1].copy()