Skip to content

Instantly share code, notes, and snippets.

@pk00095
Created December 5, 2020 16:45
Show Gist options
  • Save pk00095/f54a7fcba0e5fbbdd6ad019653fd5f82 to your computer and use it in GitHub Desktop.
Save pk00095/f54a7fcba0e5fbbdd6ad019653fd5f82 to your computer and use it in GitHub Desktop.
detection of text using pure image processing with opencv in python
import cv2
import numpy as np
from nms import non_max_suppression_fast
def draw_bboxes(img, bboxes, colour=(0,255, 255)):
for x1, y1, x2, y2 in bboxes:
img = cv2.rectangle(img,(x1, y1), (x2, y2), colour, 1)
return img
def letter_rects(img, bboxes):
mask = np.zeros(img.shape[:-1], dtype=np.uint8)
for x1, y1, x2, y2 in bboxes:
mask[y1:y2, x1:x2] = 255
return mask
def letter_to_words(img):
# get horizontal mask of large size since text are horizontal components
kernel_5x5 = np.ones((1,5), np.uint8)
img_dilation_5x5 = cv2.dilate(img.copy(), kernel_5x5, iterations=2)
kernel_2x2 = np.ones((1,2), np.uint8)
img_dilation_2x2 = cv2.dilate(img_dilation_5x5, kernel_2x2, iterations=1)
return img_dilation_2x2
def detect_words(filepath):
# Create MSER object
mser = cv2.MSER_create()
# Your image path i-e receipt path
img = cv2.imread(filepath)
H, W, _ = img.shape
img = cv2.resize(img, (640,480))
h_ratio = H/480
w_ratio = W/640
# Convert to gray scale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# adaptive histogram Equalization
# for image contrast enhancement
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
cl1 = clahe.apply(gray)
vis = img.copy()
# detect regions in gray scale image
regions, bboxes = mser.detectRegions(gray)
# width --> x2
bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
# height --> y2
bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
bboxes = non_max_suppression_fast(bboxes, overlapThresh=0.15)
mask = np.zeros((gray.shape[0], gray.shape[1]), dtype=np.uint8)
for bbox in bboxes:
x1, y1, x2, y2 = bbox
mask[y1:y2, x1:x2] = 255
mask = letter_to_words(img=mask)
contours, hier = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
final_bboxes = []
for c in contours:
contours_poly = cv2.approxPolyDP(c, 3, True)
x,y,w,h = cv2.boundingRect(contours_poly)
final_bboxes.append([x, y, x+w, y+h])
# mask -> blob -> contour -> bboxes
return vis, final_bboxes, h_ratio, w_ratio
filepath = 'path/to/file'
word_detections, bboxes, h_ratio, w_ratio = detect_words(filepath)
word_detections = draw_bboxes(word_detections, bboxes, (0,0,0))
cv2.imshow("text only", word_detections)
cv2.waitKey(0)
# import the necessary packages
import numpy as np
# Malisiewicz et al.
def non_max_suppression_fast(boxes, overlapThresh):
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats --
# this is important since we'll be doing a bunch of divisions
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
# compute the area of the bounding boxes and sort the bounding
# boxes by the bottom-right y-coordinate of the bounding box
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(y2)
# keep looping while some indexes still remain in the indexes
# list
while len(idxs) > 0:
# grab the last index in the indexes list and add the
# index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
# find the largest (x, y) coordinates for the start of
# the bounding box and the smallest (x, y) coordinates
# for the end of the bounding box
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
# delete all indexes from the index list that have
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
# return only the bounding boxes that were picked using the
# integer data type
return boxes[pick].astype("int")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment