Skip to content

Instantly share code, notes, and snippets.

@ultrasounder
Forked from RhetTbull/vision.py
Created November 22, 2023 16:58
Show Gist options
  • Select an option

  • Save ultrasounder/13bfd030a0aa9c9b15e33e0a8fbf4119 to your computer and use it in GitHub Desktop.

Select an option

Save ultrasounder/13bfd030a0aa9c9b15e33e0a8fbf4119 to your computer and use it in GitHub Desktop.

Revisions

  1. @RhetTbull RhetTbull revised this gist Sep 18, 2022. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions vision.py
    Original file line number Diff line number Diff line change
    @@ -29,8 +29,6 @@ def image_to_text(img_path, lang="eng"):
    handler = make_request_handler(results)
    vision_request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler)
    error = vision_handler.performRequests_error_([vision_request], None)
    vision_request.dealloc()
    vision_handler.dealloc()

    return results

  2. @RhetTbull RhetTbull created this gist Nov 11, 2020.
    66 changes: 66 additions & 0 deletions vision.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    """ Use Apple's Vision Framework via PyObjC to detect text in images """

    import pathlib

    import Quartz
    import Vision
    from Cocoa import NSURL
    from Foundation import NSDictionary
    # needed to capture system-level stderr
    from wurlitzer import pipes


    def image_to_text(img_path, lang="eng"):
    input_url = NSURL.fileURLWithPath_(img_path)

    with pipes() as (out, err):
    # capture stdout and stderr from system calls
    # otherwise, Quartz.CIImage.imageWithContentsOfURL_
    # prints to stderr something like:
    # 2020-09-20 20:55:25.538 python[73042:5650492] Creating client/daemon connection: B8FE995E-3F27-47F4-9FA8-559C615FD774
    # 2020-09-20 20:55:25.652 python[73042:5650492] Got the query meta data reply for: com.apple.MobileAsset.RawCamera.Camera, response: 0
    input_image = Quartz.CIImage.imageWithContentsOfURL_(input_url)

    vision_options = NSDictionary.dictionaryWithDictionary_({})
    vision_handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(
    input_image, vision_options
    )
    results = []
    handler = make_request_handler(results)
    vision_request = Vision.VNRecognizeTextRequest.alloc().initWithCompletionHandler_(handler)
    error = vision_handler.performRequests_error_([vision_request], None)
    vision_request.dealloc()
    vision_handler.dealloc()

    return results

    def make_request_handler(results):
    """ results: list to store results """
    if not isinstance(results, list):
    raise ValueError("results must be a list")

    def handler(request, error):
    if error:
    print(f"Error! {error}")
    else:
    observations = request.results()
    for text_observation in observations:
    recognized_text = text_observation.topCandidates_(1)[0]
    results.append([recognized_text.string(), recognized_text.confidence()])
    return handler


    def main():
    import sys
    import pathlib

    img_path = pathlib.Path(sys.argv[1])
    if not img_path.is_file():
    sys.exit("Invalid image path")
    img_path = str(img_path.resolve())
    results = image_to_text(img_path)
    print(results)


    if __name__ == "__main__":
    main()