Skip to content

Instantly share code, notes, and snippets.

@mindis
Forked from lpillmann/crop_morphology.py
Created July 10, 2017 15:57
Show Gist options
  • Save mindis/2d61a34783ba40d56a43a993b06199d3 to your computer and use it in GitHub Desktop.
Save mindis/2d61a34783ba40d56a43a993b06199d3 to your computer and use it in GitHub Desktop.

Revisions

  1. @lpillmann lpillmann revised this gist Jun 11, 2017. 1 changed file with 2 additions and 12 deletions.
    14 changes: 2 additions & 12 deletions crop_morphology.py
    Original file line number Diff line number Diff line change
    @@ -14,8 +14,6 @@
    Adapted to Python 3 by Lui Pillmann (https://github.com/luipillmann)
    '''

    #import ipdb

    import glob
    import os
    import random
    @@ -35,7 +33,6 @@ def dilate(ary, N, iterations):
    """Dilate using an NxN '+' sign shape. ary is np.uint8."""

    kernel = np.zeros((N,N), dtype=np.uint8)
    #ipdb.set_trace()
    kernel[(N-1)//2,:] = 1 # Bug solved with // (integer division)

    dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)
    @@ -130,8 +127,6 @@ def find_components(edges, max_components=16):
    n += 1
    dilated_image = dilate(edges, N=3, iterations=n)
    dilated_image = np.uint8(dilated_image)
    #ipdb.set_trace()
    #_, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    _, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    count = len(contours)
    #print dilation
    @@ -247,7 +242,6 @@ def downscale_image(im, max_dim=2048):


    def process_image(path, out_path):
    #ipdb.set_trace() #reference breakpoint

    orig_im = Image.open(path)
    scale, im = downscale_image(orig_im)
    @@ -282,11 +276,7 @@ def process_image(path, out_path):
    crop = find_optimal_components_subset(contours, edges)
    crop = pad_crop(crop, contours, edges, border_contour)

    #ipdb.set_trace()

    crop = [int(x / scale) for x in crop] # upscale to the original image size.

    #ipdb.set_trace()

    #draw = ImageDraw.Draw(im)
    #c_info = props_for_contours(contours, edges)
    @@ -311,8 +301,8 @@ def process_image(path, out_path):
    files = sys.argv[1:]

    for path in files:
    #out_path = path.replace('.jpg', '.crop.png')
    out_path = path.replace('.png', '.crop.png')
    out_path = path.replace('.jpg', '.crop.png')
    #out_path = path.replace('.png', '.crop.png') # .png as input
    if os.path.exists(out_path): continue
    try:
    process_image(path, out_path)
  2. @lpillmann lpillmann created this gist Jun 11, 2017.
    320 changes: 320 additions & 0 deletions crop_morphology.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,320 @@
    #!/usr/bin/env python
    '''Crop an image to just the portions containing text.
    Usage:
    ./crop_morphology.py path/to/image.jpg
    This will place the cropped image in path/to/image.crop.png.
    For details on the methodology, see
    http://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html
    Script created by Dan Vanderkam (https://github.com/danvk)
    Adapted to Python 3 by Lui Pillmann (https://github.com/luipillmann)
    '''

    #import ipdb

    import glob
    import os
    import random
    import sys
    import random
    import math
    import json
    from collections import defaultdict

    import cv2
    from PIL import Image, ImageDraw
    import numpy as np
    from scipy.ndimage.filters import rank_filter


    def dilate(ary, N, iterations):
    """Dilate using an NxN '+' sign shape. ary is np.uint8."""

    kernel = np.zeros((N,N), dtype=np.uint8)
    #ipdb.set_trace()
    kernel[(N-1)//2,:] = 1 # Bug solved with // (integer division)

    dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)

    kernel = np.zeros((N,N), dtype=np.uint8)
    kernel[:,(N-1)//2] = 1 # Bug solved with // (integer division)
    dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations)
    return dilated_image


    def props_for_contours(contours, ary):
    """Calculate bounding box & the number of set pixels for each contour."""
    c_info = []
    for c in contours:
    x,y,w,h = cv2.boundingRect(c)
    c_im = np.zeros(ary.shape)
    cv2.drawContours(c_im, [c], 0, 255, -1)
    c_info.append({
    'x1': x,
    'y1': y,
    'x2': x + w - 1,
    'y2': y + h - 1,
    'sum': np.sum(ary * (c_im > 0))/255
    })
    return c_info


    def union_crops(crop1, crop2):
    """Union two (x1, y1, x2, y2) rects."""
    x11, y11, x21, y21 = crop1
    x12, y12, x22, y22 = crop2
    return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)


    def intersect_crops(crop1, crop2):
    x11, y11, x21, y21 = crop1
    x12, y12, x22, y22 = crop2
    return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22)


    def crop_area(crop):
    x1, y1, x2, y2 = crop
    return max(0, x2 - x1) * max(0, y2 - y1)


    def find_border_components(contours, ary):
    borders = []
    area = ary.shape[0] * ary.shape[1]
    for i, c in enumerate(contours):
    x,y,w,h = cv2.boundingRect(c)
    if w * h > 0.5 * area:
    borders.append((i, x, y, x + w - 1, y + h - 1))
    return borders


    def angle_from_right(deg):
    return min(deg % 90, 90 - (deg % 90))


    def remove_border(contour, ary):
    """Remove everything outside a border contour."""
    # Use a rotated rectangle (should be a good approximation of a border).
    # If it's far from a right angle, it's probably two sides of a border and
    # we should use the bounding box instead.
    c_im = np.zeros(ary.shape)
    r = cv2.minAreaRect(contour)
    degs = r[2]
    if angle_from_right(degs) <= 10.0:
    box = cv2.boxPoints(r)
    box = np.int0(box)
    cv2.drawContours(c_im, [box], 0, 255, -1)
    cv2.drawContours(c_im, [box], 0, 0, 4)
    else:
    x1, y1, x2, y2 = cv2.boundingRect(contour)
    cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1)
    cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4)

    return np.minimum(c_im, ary)


    def find_components(edges, max_components=16):
    """Dilate the image until there are just a few connected components.
    Returns contours for these components."""
    # Perform increasingly aggressive dilation until there are just a few
    # connected components.

    count = 21
    dilation = 5
    n = 1
    while count > 16:
    n += 1
    dilated_image = dilate(edges, N=3, iterations=n)
    dilated_image = np.uint8(dilated_image)
    #ipdb.set_trace()
    #_, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    _, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    count = len(contours)
    #print dilation
    #Image.fromarray(edges).show()
    #Image.fromarray(255 * dilated_image).show()
    return contours


    def find_optimal_components_subset(contours, edges):
    """Find a crop which strikes a good balance of coverage/compactness.
    Returns an (x1, y1, x2, y2) tuple.
    """
    c_info = props_for_contours(contours, edges)
    c_info.sort(key=lambda x: -x['sum'])
    total = np.sum(edges) / 255
    area = edges.shape[0] * edges.shape[1]

    c = c_info[0]
    del c_info[0]
    this_crop = c['x1'], c['y1'], c['x2'], c['y2']
    crop = this_crop
    covered_sum = c['sum']

    while covered_sum < total:
    changed = False
    recall = 1.0 * covered_sum / total
    prec = 1 - 1.0 * crop_area(crop) / area
    f1 = 2 * (prec * recall / (prec + recall))
    #print '----'
    for i, c in enumerate(c_info):
    this_crop = c['x1'], c['y1'], c['x2'], c['y2']
    new_crop = union_crops(crop, this_crop)
    new_sum = covered_sum + c['sum']
    new_recall = 1.0 * new_sum / total
    new_prec = 1 - 1.0 * crop_area(new_crop) / area
    new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall)

    # Add this crop if it improves f1 score,
    # _or_ it adds 25% of the remaining pixels for <15% crop expansion.
    # ^^^ very ad-hoc! make this smoother
    remaining_frac = c['sum'] / (total - covered_sum)
    new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1
    if new_f1 > f1 or (
    remaining_frac > 0.25 and new_area_frac < 0.15):
    print('%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
    i, covered_sum, new_sum, total, remaining_frac,
    crop_area(crop), crop_area(new_crop), area, new_area_frac,
    f1, new_f1))
    crop = new_crop
    covered_sum = new_sum
    del c_info[i]
    changed = True
    break

    if not changed:
    break

    return crop


    def pad_crop(crop, contours, edges, border_contour, pad_px=15):
    """Slightly expand the crop to get full contours.
    This will expand to include any contours it currently intersects, but will
    not expand past a border.
    """
    bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1]
    if border_contour is not None and len(border_contour) > 0:
    c = props_for_contours([border_contour], edges)[0]
    bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5

    def crop_in_border(crop):
    x1, y1, x2, y2 = crop
    x1 = max(x1 - pad_px, bx1)
    y1 = max(y1 - pad_px, by1)
    x2 = min(x2 + pad_px, bx2)
    y2 = min(y2 + pad_px, by2)
    return crop

    crop = crop_in_border(crop)

    c_info = props_for_contours(contours, edges)
    changed = False
    for c in c_info:
    this_crop = c['x1'], c['y1'], c['x2'], c['y2']
    this_area = crop_area(this_crop)
    int_area = crop_area(intersect_crops(crop, this_crop))
    new_crop = crop_in_border(union_crops(crop, this_crop))
    if 0 < int_area < this_area and crop != new_crop:
    print('%s -> %s' % (str(crop), str(new_crop)))
    changed = True
    crop = new_crop

    if changed:
    return pad_crop(crop, contours, edges, border_contour, pad_px)
    else:
    return crop


    def downscale_image(im, max_dim=2048):
    """Shrink im until its longest dimension is <= max_dim.
    Returns new_image, scale (where scale <= 1).
    """
    a, b = im.size
    if max(a, b) <= max_dim:
    return 1.0, im

    scale = 1.0 * max_dim / max(a, b)
    new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS)
    return scale, new_im


    def process_image(path, out_path):
    #ipdb.set_trace() #reference breakpoint

    orig_im = Image.open(path)
    scale, im = downscale_image(orig_im)

    edges = cv2.Canny(np.asarray(im), 100, 200)

    # TODO: dilate image _before_ finding a border. This is crazy sensitive!
    _, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    borders = find_border_components(contours, edges)
    borders.sort(key=lambda i_x1_y1_x2_y2: (i_x1_y1_x2_y2[3] - i_x1_y1_x2_y2[1]) * (i_x1_y1_x2_y2[4] - i_x1_y1_x2_y2[2]))

    border_contour = None
    if len(borders):
    border_contour = contours[borders[0][0]]
    edges = remove_border(border_contour, edges)

    edges = 255 * (edges > 0).astype(np.uint8)

    # Remove ~1px borders using a rank filter.
    maxed_rows = rank_filter(edges, -4, size=(1, 20))
    maxed_cols = rank_filter(edges, -4, size=(20, 1))
    debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols)
    edges = debordered

    contours = find_components(edges)
    if len(contours) == 0:
    print('%s -> (no text!)' % path)
    return



    crop = find_optimal_components_subset(contours, edges)
    crop = pad_crop(crop, contours, edges, border_contour)

    #ipdb.set_trace()

    crop = [int(x / scale) for x in crop] # upscale to the original image size.

    #ipdb.set_trace()

    #draw = ImageDraw.Draw(im)
    #c_info = props_for_contours(contours, edges)
    #for c in c_info:
    # this_crop = c['x1'], c['y1'], c['x2'], c['y2']
    # draw.rectangle(this_crop, outline='blue')
    #draw.rectangle(crop, outline='red')
    #im.save(out_path)
    #draw.text((50, 50), path, fill='red')
    #orig_im.save(out_path)
    #im.show()
    text_im = orig_im.crop(crop)
    text_im.save(out_path)
    print('%s -> %s' % (path, out_path))


    if __name__ == '__main__':
    if len(sys.argv) == 2 and '*' in sys.argv[1]:
    files = glob.glob(sys.argv[1])
    random.shuffle(files)
    else:
    files = sys.argv[1:]

    for path in files:
    #out_path = path.replace('.jpg', '.crop.png')
    out_path = path.replace('.png', '.crop.png')
    if os.path.exists(out_path): continue
    try:
    process_image(path, out_path)
    except Exception as e:
    print('%s %s' % (path, e))