@@ -0,0 +1,320 @@
#!/usr/bin/env python
'''Crop an image to just the portions containing text.
Usage:
./crop_morphology.py path/to/image.jpg
This will place the cropped image in path/to/image.crop.png.
For details on the methodology, see
http://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html
Script created by Dan Vanderkam (https://github.com/danvk)
Adapted to Python 3 by Lui Pillmann (https://github.com/luipillmann)
'''
#import ipdb
import glob
import os
import random
import sys
import random
import math
import json
from collections import defaultdict
import cv2
from PIL import Image , ImageDraw
import numpy as np
from scipy .ndimage .filters import rank_filter
def dilate (ary , N , iterations ):
"""Dilate using an NxN '+' sign shape. ary is np.uint8."""
kernel = np .zeros ((N ,N ), dtype = np .uint8 )
#ipdb.set_trace()
kernel [(N - 1 )// 2 ,:] = 1 # Bug solved with // (integer division)
dilated_image = cv2 .dilate (ary / 255 , kernel , iterations = iterations )
kernel = np .zeros ((N ,N ), dtype = np .uint8 )
kernel [:,(N - 1 )// 2 ] = 1 # Bug solved with // (integer division)
dilated_image = cv2 .dilate (dilated_image , kernel , iterations = iterations )
return dilated_image
def props_for_contours (contours , ary ):
"""Calculate bounding box & the number of set pixels for each contour."""
c_info = []
for c in contours :
x ,y ,w ,h = cv2 .boundingRect (c )
c_im = np .zeros (ary .shape )
cv2 .drawContours (c_im , [c ], 0 , 255 , - 1 )
c_info .append ({
'x1' : x ,
'y1' : y ,
'x2' : x + w - 1 ,
'y2' : y + h - 1 ,
'sum' : np .sum (ary * (c_im > 0 ))/ 255
})
return c_info
def union_crops (crop1 , crop2 ):
"""Union two (x1, y1, x2, y2) rects."""
x11 , y11 , x21 , y21 = crop1
x12 , y12 , x22 , y22 = crop2
return min (x11 , x12 ), min (y11 , y12 ), max (x21 , x22 ), max (y21 , y22 )
def intersect_crops (crop1 , crop2 ):
x11 , y11 , x21 , y21 = crop1
x12 , y12 , x22 , y22 = crop2
return max (x11 , x12 ), max (y11 , y12 ), min (x21 , x22 ), min (y21 , y22 )
def crop_area (crop ):
x1 , y1 , x2 , y2 = crop
return max (0 , x2 - x1 ) * max (0 , y2 - y1 )
def find_border_components (contours , ary ):
borders = []
area = ary .shape [0 ] * ary .shape [1 ]
for i , c in enumerate (contours ):
x ,y ,w ,h = cv2 .boundingRect (c )
if w * h > 0.5 * area :
borders .append ((i , x , y , x + w - 1 , y + h - 1 ))
return borders
def angle_from_right (deg ):
return min (deg % 90 , 90 - (deg % 90 ))
def remove_border (contour , ary ):
"""Remove everything outside a border contour."""
# Use a rotated rectangle (should be a good approximation of a border).
# If it's far from a right angle, it's probably two sides of a border and
# we should use the bounding box instead.
c_im = np .zeros (ary .shape )
r = cv2 .minAreaRect (contour )
degs = r [2 ]
if angle_from_right (degs ) <= 10.0 :
box = cv2 .boxPoints (r )
box = np .int0 (box )
cv2 .drawContours (c_im , [box ], 0 , 255 , - 1 )
cv2 .drawContours (c_im , [box ], 0 , 0 , 4 )
else :
x1 , y1 , x2 , y2 = cv2 .boundingRect (contour )
cv2 .rectangle (c_im , (x1 , y1 ), (x2 , y2 ), 255 , - 1 )
cv2 .rectangle (c_im , (x1 , y1 ), (x2 , y2 ), 0 , 4 )
return np .minimum (c_im , ary )
def find_components (edges , max_components = 16 ):
"""Dilate the image until there are just a few connected components.
Returns contours for these components."""
# Perform increasingly aggressive dilation until there are just a few
# connected components.
count = 21
dilation = 5
n = 1
while count > 16 :
n += 1
dilated_image = dilate (edges , N = 3 , iterations = n )
dilated_image = np .uint8 (dilated_image )
#ipdb.set_trace()
#_, contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
_ , contours , hierarchy = cv2 .findContours (dilated_image , cv2 .RETR_TREE , cv2 .CHAIN_APPROX_SIMPLE )
count = len (contours )
#print dilation
#Image.fromarray(edges).show()
#Image.fromarray(255 * dilated_image).show()
return contours
def find_optimal_components_subset (contours , edges ):
"""Find a crop which strikes a good balance of coverage/compactness.
Returns an (x1, y1, x2, y2) tuple.
"""
c_info = props_for_contours (contours , edges )
c_info .sort (key = lambda x : - x ['sum' ])
total = np .sum (edges ) / 255
area = edges .shape [0 ] * edges .shape [1 ]
c = c_info [0 ]
del c_info [0 ]
this_crop = c ['x1' ], c ['y1' ], c ['x2' ], c ['y2' ]
crop = this_crop
covered_sum = c ['sum' ]
while covered_sum < total :
changed = False
recall = 1.0 * covered_sum / total
prec = 1 - 1.0 * crop_area (crop ) / area
f1 = 2 * (prec * recall / (prec + recall ))
#print '----'
for i , c in enumerate (c_info ):
this_crop = c ['x1' ], c ['y1' ], c ['x2' ], c ['y2' ]
new_crop = union_crops (crop , this_crop )
new_sum = covered_sum + c ['sum' ]
new_recall = 1.0 * new_sum / total
new_prec = 1 - 1.0 * crop_area (new_crop ) / area
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall )
# Add this crop if it improves f1 score,
# _or_ it adds 25% of the remaining pixels for <15% crop expansion.
# ^^^ very ad-hoc! make this smoother
remaining_frac = c ['sum' ] / (total - covered_sum )
new_area_frac = 1.0 * crop_area (new_crop ) / crop_area (crop ) - 1
if new_f1 > f1 or (
remaining_frac > 0.25 and new_area_frac < 0.15 ):
print ('%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
i , covered_sum , new_sum , total , remaining_frac ,
crop_area (crop ), crop_area (new_crop ), area , new_area_frac ,
f1 , new_f1 ))
crop = new_crop
covered_sum = new_sum
del c_info [i ]
changed = True
break
if not changed :
break
return crop
def pad_crop (crop , contours , edges , border_contour , pad_px = 15 ):
"""Slightly expand the crop to get full contours.
This will expand to include any contours it currently intersects, but will
not expand past a border.
"""
bx1 , by1 , bx2 , by2 = 0 , 0 , edges .shape [0 ], edges .shape [1 ]
if border_contour is not None and len (border_contour ) > 0 :
c = props_for_contours ([border_contour ], edges )[0 ]
bx1 , by1 , bx2 , by2 = c ['x1' ] + 5 , c ['y1' ] + 5 , c ['x2' ] - 5 , c ['y2' ] - 5
def crop_in_border (crop ):
x1 , y1 , x2 , y2 = crop
x1 = max (x1 - pad_px , bx1 )
y1 = max (y1 - pad_px , by1 )
x2 = min (x2 + pad_px , bx2 )
y2 = min (y2 + pad_px , by2 )
return crop
crop = crop_in_border (crop )
c_info = props_for_contours (contours , edges )
changed = False
for c in c_info :
this_crop = c ['x1' ], c ['y1' ], c ['x2' ], c ['y2' ]
this_area = crop_area (this_crop )
int_area = crop_area (intersect_crops (crop , this_crop ))
new_crop = crop_in_border (union_crops (crop , this_crop ))
if 0 < int_area < this_area and crop != new_crop :
print ('%s -> %s' % (str (crop ), str (new_crop )))
changed = True
crop = new_crop
if changed :
return pad_crop (crop , contours , edges , border_contour , pad_px )
else :
return crop
def downscale_image (im , max_dim = 2048 ):
"""Shrink im until its longest dimension is <= max_dim.
Returns new_image, scale (where scale <= 1).
"""
a , b = im .size
if max (a , b ) <= max_dim :
return 1.0 , im
scale = 1.0 * max_dim / max (a , b )
new_im = im .resize ((int (a * scale ), int (b * scale )), Image .ANTIALIAS )
return scale , new_im
def process_image (path , out_path ):
#ipdb.set_trace() #reference breakpoint
orig_im = Image .open (path )
scale , im = downscale_image (orig_im )
edges = cv2 .Canny (np .asarray (im ), 100 , 200 )
# TODO: dilate image _before_ finding a border. This is crazy sensitive!
_ , contours , hierarchy = cv2 .findContours (edges , cv2 .RETR_TREE , cv2 .CHAIN_APPROX_SIMPLE )
borders = find_border_components (contours , edges )
borders .sort (key = lambda i_x1_y1_x2_y2 : (i_x1_y1_x2_y2 [3 ] - i_x1_y1_x2_y2 [1 ]) * (i_x1_y1_x2_y2 [4 ] - i_x1_y1_x2_y2 [2 ]))
border_contour = None
if len (borders ):
border_contour = contours [borders [0 ][0 ]]
edges = remove_border (border_contour , edges )
edges = 255 * (edges > 0 ).astype (np .uint8 )
# Remove ~1px borders using a rank filter.
maxed_rows = rank_filter (edges , - 4 , size = (1 , 20 ))
maxed_cols = rank_filter (edges , - 4 , size = (20 , 1 ))
debordered = np .minimum (np .minimum (edges , maxed_rows ), maxed_cols )
edges = debordered
contours = find_components (edges )
if len (contours ) == 0 :
print ('%s -> (no text!)' % path )
return
crop = find_optimal_components_subset (contours , edges )
crop = pad_crop (crop , contours , edges , border_contour )
#ipdb.set_trace()
crop = [int (x / scale ) for x in crop ] # upscale to the original image size.
#ipdb.set_trace()
#draw = ImageDraw.Draw(im)
#c_info = props_for_contours(contours, edges)
#for c in c_info:
# this_crop = c['x1'], c['y1'], c['x2'], c['y2']
# draw.rectangle(this_crop, outline='blue')
#draw.rectangle(crop, outline='red')
#im.save(out_path)
#draw.text((50, 50), path, fill='red')
#orig_im.save(out_path)
#im.show()
text_im = orig_im .crop (crop )
text_im .save (out_path )
print ('%s -> %s' % (path , out_path ))
if __name__ == '__main__' :
if len (sys .argv ) == 2 and '*' in sys .argv [1 ]:
files = glob .glob (sys .argv [1 ])
random .shuffle (files )
else :
files = sys .argv [1 :]
for path in files :
#out_path = path.replace('.jpg', '.crop.png')
out_path = path .replace ('.png' , '.crop.png' )
if os .path .exists (out_path ): continue
try :
process_image (path , out_path )
except Exception as e :
print ('%s %s' % (path , e ))