pk00095 · December 5, 2020 16:45
diff --git a/mser_text_detection.py b/mser_text_detection.py
 import cv2
 import numpy as np
 from nms import non_max_suppression_fast

 def draw_bboxes(img, bboxes, colour=(0,255, 255)):
    for x1, y1, x2, y2 in bboxes:
        img = cv2.rectangle(img,(x1, y1), (x2, y2), colour, 1)
    return img

 def letter_rects(img, bboxes):
    mask = np.zeros(img.shape[:-1], dtype=np.uint8)
    for x1, y1, x2, y2 in bboxes:
        mask[y1:y2, x1:x2] = 255
    return mask

 def letter_to_words(img):
    # get horizontal mask of large size since text are horizontal components
    kernel_5x5 = np.ones((1,5), np.uint8) 
    img_dilation_5x5 = cv2.dilate(img.copy(), kernel_5x5, iterations=2)

    kernel_2x2 = np.ones((1,2), np.uint8)
    img_dilation_2x2 = cv2.dilate(img_dilation_5x5, kernel_2x2, iterations=1)

    return img_dilation_2x2

 def detect_words(filepath):
    # Create MSER object
    mser = cv2.MSER_create()

    # Your image path i-e receipt path
    img = cv2.imread(filepath)
    H, W, _ = img.shape
    img = cv2.resize(img, (640,480))

    h_ratio = H/480
    w_ratio = W/640

    # Convert to gray scale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # adaptive histogram Equalization
    # for image contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl1 = clahe.apply(gray)

    vis = img.copy()

    # detect regions in gray scale image
    regions, bboxes  = mser.detectRegions(gray)

    # width --> x2
    bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
    # height --> y2
    bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]

    bboxes = non_max_suppression_fast(bboxes, overlapThresh=0.15)

    mask = np.zeros((gray.shape[0], gray.shape[1]), dtype=np.uint8)
    for bbox in bboxes:
        x1, y1, x2, y2 = bbox
        mask[y1:y2, x1:x2] = 255

    mask = letter_to_words(img=mask)

    contours, hier = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    final_bboxes = []

    for c in contours:
        contours_poly = cv2.approxPolyDP(c, 3, True)
        x,y,w,h = cv2.boundingRect(contours_poly)
        final_bboxes.append([x, y, x+w, y+h])

    # mask -> blob -> contour -> bboxes

    return vis, final_bboxes, h_ratio, w_ratio

 filepath = 'path/to/file'

 word_detections, bboxes, h_ratio, w_ratio = detect_words(filepath)
 word_detections = draw_bboxes(word_detections, bboxes, (0,0,0))

 cv2.imshow("text only", word_detections)
 cv2.waitKey(0)
diff --git a/nms.py b/nms.py
 # import the necessary packages
 import numpy as np
 # Malisiewicz et al.
 def non_max_suppression_fast(boxes, overlapThresh):
 	# if there are no boxes, return an empty list
 	if len(boxes) == 0:
 		return []
 	# if the bounding boxes integers, convert them to floats --
 	# this is important since we'll be doing a bunch of divisions
 	if boxes.dtype.kind == "i":
 		boxes = boxes.astype("float")
 	# initialize the list of picked indexes	
 	pick = []
 	# grab the coordinates of the bounding boxes
 	x1 = boxes[:,0]
 	y1 = boxes[:,1]
 	x2 = boxes[:,2]
 	y2 = boxes[:,3]
 	# compute the area of the bounding boxes and sort the bounding
 	# boxes by the bottom-right y-coordinate of the bounding box
 	area = (x2 - x1 + 1) * (y2 - y1 + 1)
 	idxs = np.argsort(y2)
 	# keep looping while some indexes still remain in the indexes
 	# list
 	while len(idxs) > 0:
 		# grab the last index in the indexes list and add the
 		# index value to the list of picked indexes
 		last = len(idxs) - 1
 		i = idxs[last]
 		pick.append(i)
 		# find the largest (x, y) coordinates for the start of
 		# the bounding box and the smallest (x, y) coordinates
 		# for the end of the bounding box
 		xx1 = np.maximum(x1[i], x1[idxs[:last]])
 		yy1 = np.maximum(y1[i], y1[idxs[:last]])
 		xx2 = np.minimum(x2[i], x2[idxs[:last]])
 		yy2 = np.minimum(y2[i], y2[idxs[:last]])
 		# compute the width and height of the bounding box
 		w = np.maximum(0, xx2 - xx1 + 1)
 		h = np.maximum(0, yy2 - yy1 + 1)
 		# compute the ratio of overlap
 		overlap = (w * h) / area[idxs[:last]]
 		# delete all indexes from the index list that have
 		idxs = np.delete(idxs, np.concatenate(([last],
 			np.where(overlap > overlapThresh)[0])))
 	# return only the bounding boxes that were picked using the
 	# integer data type
 	return boxes[pick].astype("int")
	import cv2
	import numpy as np
	from nms import non_max_suppression_fast

	def draw_bboxes(img, bboxes, colour=(0,255, 255)):
	for x1, y1, x2, y2 in bboxes:
	img = cv2.rectangle(img,(x1, y1), (x2, y2), colour, 1)
	return img

	def letter_rects(img, bboxes):
	mask = np.zeros(img.shape[:-1], dtype=np.uint8)
	for x1, y1, x2, y2 in bboxes:
	mask[y1:y2, x1:x2] = 255
	return mask

	def letter_to_words(img):
	# get horizontal mask of large size since text are horizontal components
	kernel_5x5 = np.ones((1,5), np.uint8)
	img_dilation_5x5 = cv2.dilate(img.copy(), kernel_5x5, iterations=2)

	kernel_2x2 = np.ones((1,2), np.uint8)
	img_dilation_2x2 = cv2.dilate(img_dilation_5x5, kernel_2x2, iterations=1)

	return img_dilation_2x2

	def detect_words(filepath):
	# Create MSER object
	mser = cv2.MSER_create()

	# Your image path i-e receipt path
	img = cv2.imread(filepath)
	H, W, _ = img.shape
	img = cv2.resize(img, (640,480))

	h_ratio = H/480
	w_ratio = W/640

	# Convert to gray scale
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# adaptive histogram Equalization
	# for image contrast enhancement
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
	cl1 = clahe.apply(gray)

	vis = img.copy()

	# detect regions in gray scale image
	regions, bboxes = mser.detectRegions(gray)

	# width --> x2
	bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
	# height --> y2
	bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]

	bboxes = non_max_suppression_fast(bboxes, overlapThresh=0.15)

	mask = np.zeros((gray.shape[0], gray.shape[1]), dtype=np.uint8)
	for bbox in bboxes:
	x1, y1, x2, y2 = bbox
	mask[y1:y2, x1:x2] = 255

	mask = letter_to_words(img=mask)

	contours, hier = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

	final_bboxes = []

	for c in contours:
	contours_poly = cv2.approxPolyDP(c, 3, True)
	x,y,w,h = cv2.boundingRect(contours_poly)
	final_bboxes.append([x, y, x+w, y+h])

	# mask -> blob -> contour -> bboxes

	return vis, final_bboxes, h_ratio, w_ratio

	filepath = 'path/to/file'

	word_detections, bboxes, h_ratio, w_ratio = detect_words(filepath)
	word_detections = draw_bboxes(word_detections, bboxes, (0,0,0))

	cv2.imshow("text only", word_detections)
	cv2.waitKey(0)
	# import the necessary packages
	import numpy as np
	# Malisiewicz et al.
	def non_max_suppression_fast(boxes, overlapThresh):
	# if there are no boxes, return an empty list
	if len(boxes) == 0:
	return []
	# if the bounding boxes integers, convert them to floats --
	# this is important since we'll be doing a bunch of divisions
	if boxes.dtype.kind == "i":
	boxes = boxes.astype("float")
	# initialize the list of picked indexes
	pick = []
	# grab the coordinates of the bounding boxes
	x1 = boxes[:,0]
	y1 = boxes[:,1]
	x2 = boxes[:,2]
	y2 = boxes[:,3]
	# compute the area of the bounding boxes and sort the bounding
	# boxes by the bottom-right y-coordinate of the bounding box
	area = (x2 - x1 + 1) * (y2 - y1 + 1)
	idxs = np.argsort(y2)
	# keep looping while some indexes still remain in the indexes
	# list
	while len(idxs) > 0:
	# grab the last index in the indexes list and add the
	# index value to the list of picked indexes
	last = len(idxs) - 1
	i = idxs[last]
	pick.append(i)
	# find the largest (x, y) coordinates for the start of
	# the bounding box and the smallest (x, y) coordinates
	# for the end of the bounding box
	xx1 = np.maximum(x1[i], x1[idxs[:last]])
	yy1 = np.maximum(y1[i], y1[idxs[:last]])
	xx2 = np.minimum(x2[i], x2[idxs[:last]])
	yy2 = np.minimum(y2[i], y2[idxs[:last]])
	# compute the width and height of the bounding box
	w = np.maximum(0, xx2 - xx1 + 1)
	h = np.maximum(0, yy2 - yy1 + 1)
	# compute the ratio of overlap
	overlap = (w * h) / area[idxs[:last]]
	# delete all indexes from the index list that have
	idxs = np.delete(idxs, np.concatenate(([last],
	np.where(overlap > overlapThresh)[0])))
	# return only the bounding boxes that were picked using the
	# integer data type
	return boxes[pick].astype("int")