Created
December 5, 2020 16:45
-
-
Save pk00095/f54a7fcba0e5fbbdd6ad019653fd5f82 to your computer and use it in GitHub Desktop.
detection of text using pure image processing with opencv in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import cv2 | |
| import numpy as np | |
| from nms import non_max_suppression_fast | |
| def draw_bboxes(img, bboxes, colour=(0,255, 255)): | |
| for x1, y1, x2, y2 in bboxes: | |
| img = cv2.rectangle(img,(x1, y1), (x2, y2), colour, 1) | |
| return img | |
| def letter_rects(img, bboxes): | |
| mask = np.zeros(img.shape[:-1], dtype=np.uint8) | |
| for x1, y1, x2, y2 in bboxes: | |
| mask[y1:y2, x1:x2] = 255 | |
| return mask | |
| def letter_to_words(img): | |
| # get horizontal mask of large size since text are horizontal components | |
| kernel_5x5 = np.ones((1,5), np.uint8) | |
| img_dilation_5x5 = cv2.dilate(img.copy(), kernel_5x5, iterations=2) | |
| kernel_2x2 = np.ones((1,2), np.uint8) | |
| img_dilation_2x2 = cv2.dilate(img_dilation_5x5, kernel_2x2, iterations=1) | |
| return img_dilation_2x2 | |
| def detect_words(filepath): | |
| # Create MSER object | |
| mser = cv2.MSER_create() | |
| # Your image path i-e receipt path | |
| img = cv2.imread(filepath) | |
| H, W, _ = img.shape | |
| img = cv2.resize(img, (640,480)) | |
| h_ratio = H/480 | |
| w_ratio = W/640 | |
| # Convert to gray scale | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # adaptive histogram Equalization | |
| # for image contrast enhancement | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) | |
| cl1 = clahe.apply(gray) | |
| vis = img.copy() | |
| # detect regions in gray scale image | |
| regions, bboxes = mser.detectRegions(gray) | |
| # width --> x2 | |
| bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] | |
| # height --> y2 | |
| bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] | |
| bboxes = non_max_suppression_fast(bboxes, overlapThresh=0.15) | |
| mask = np.zeros((gray.shape[0], gray.shape[1]), dtype=np.uint8) | |
| for bbox in bboxes: | |
| x1, y1, x2, y2 = bbox | |
| mask[y1:y2, x1:x2] = 255 | |
| mask = letter_to_words(img=mask) | |
| contours, hier = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
| final_bboxes = [] | |
| for c in contours: | |
| contours_poly = cv2.approxPolyDP(c, 3, True) | |
| x,y,w,h = cv2.boundingRect(contours_poly) | |
| final_bboxes.append([x, y, x+w, y+h]) | |
| # mask -> blob -> contour -> bboxes | |
| return vis, final_bboxes, h_ratio, w_ratio | |
| filepath = 'path/to/file' | |
| word_detections, bboxes, h_ratio, w_ratio = detect_words(filepath) | |
| word_detections = draw_bboxes(word_detections, bboxes, (0,0,0)) | |
| cv2.imshow("text only", word_detections) | |
| cv2.waitKey(0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # import the necessary packages | |
| import numpy as np | |
| # Malisiewicz et al. | |
| def non_max_suppression_fast(boxes, overlapThresh): | |
| # if there are no boxes, return an empty list | |
| if len(boxes) == 0: | |
| return [] | |
| # if the bounding boxes integers, convert them to floats -- | |
| # this is important since we'll be doing a bunch of divisions | |
| if boxes.dtype.kind == "i": | |
| boxes = boxes.astype("float") | |
| # initialize the list of picked indexes | |
| pick = [] | |
| # grab the coordinates of the bounding boxes | |
| x1 = boxes[:,0] | |
| y1 = boxes[:,1] | |
| x2 = boxes[:,2] | |
| y2 = boxes[:,3] | |
| # compute the area of the bounding boxes and sort the bounding | |
| # boxes by the bottom-right y-coordinate of the bounding box | |
| area = (x2 - x1 + 1) * (y2 - y1 + 1) | |
| idxs = np.argsort(y2) | |
| # keep looping while some indexes still remain in the indexes | |
| # list | |
| while len(idxs) > 0: | |
| # grab the last index in the indexes list and add the | |
| # index value to the list of picked indexes | |
| last = len(idxs) - 1 | |
| i = idxs[last] | |
| pick.append(i) | |
| # find the largest (x, y) coordinates for the start of | |
| # the bounding box and the smallest (x, y) coordinates | |
| # for the end of the bounding box | |
| xx1 = np.maximum(x1[i], x1[idxs[:last]]) | |
| yy1 = np.maximum(y1[i], y1[idxs[:last]]) | |
| xx2 = np.minimum(x2[i], x2[idxs[:last]]) | |
| yy2 = np.minimum(y2[i], y2[idxs[:last]]) | |
| # compute the width and height of the bounding box | |
| w = np.maximum(0, xx2 - xx1 + 1) | |
| h = np.maximum(0, yy2 - yy1 + 1) | |
| # compute the ratio of overlap | |
| overlap = (w * h) / area[idxs[:last]] | |
| # delete all indexes from the index list that have | |
| idxs = np.delete(idxs, np.concatenate(([last], | |
| np.where(overlap > overlapThresh)[0]))) | |
| # return only the bounding boxes that were picked using the | |
| # integer data type | |
| return boxes[pick].astype("int") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment