Skip to content

Instantly share code, notes, and snippets.

@mstatt
Created April 5, 2023 22:11
Show Gist options
  • Select an option

  • Save mstatt/709a291b90bfdf4e461318cfc003bf2d to your computer and use it in GitHub Desktop.

Select an option

Save mstatt/709a291b90bfdf4e461318cfc003bf2d to your computer and use it in GitHub Desktop.
# USAGE
# python basic-ocr-with-spellcheck.py --image <imagename>
# import the necessary packages
from textblob import TextBlob
import pytesseract
import argparse
import cv2
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
args = vars(ap.parse_args())
# load the input image and convert it from BGR to RGB channel
# ordering
image = cv2.imread(args["image"])
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# use Tesseract to OCR the image
text = pytesseract.image_to_string(rgb)
# show the text *before* ocr-spellchecking has been applied
print("BEFORE SPELLCHECK")
print("=================")
print(text)
print("\n")
# apply spell checking to the OCR'd text
tb = TextBlob(text)
corrected = tb.correct()
# show the text after ocr-spellchecking has been applied
print("AFTER SPELLCHECK")
print("================")
print(corrected)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment