-
-
Save MrAch26/5e2aa7e73b508f8ba9133d468efa4348 to your computer and use it in GitHub Desktop.
| from PIL import Image | |
| from scipy.ndimage import gaussian_filter | |
| import numpy | |
| import pytesseract | |
| from PIL import ImageFilter | |
| def solve_captcha(filename): | |
| # thresold1 on the first stage | |
| th1 = 140 | |
| th2 = 140 # threshold after blurring | |
| sig = 1.5 # the blurring sigma | |
| from scipy import ndimage | |
| original = Image.open(filename) | |
| original.save("original.png") # reading the image from the request | |
| black_and_white = original.convert("L") # converting to black and white | |
| black_and_white.save("black_and_white.png") | |
| first_threshold = black_and_white.point(lambda p: p > th1 and 255) | |
| first_threshold.save("first_threshold.png") | |
| blur = numpy.array(first_threshold) # create an image array | |
| blurred = gaussian_filter(blur, sigma=sig) | |
| blurred = Image.fromarray(blurred) | |
| blurred.save("blurred.png") | |
| final = blurred.point(lambda p: p > th2 and 255) | |
| final = final.filter(ImageFilter.EDGE_ENHANCE_MORE) | |
| final = final.filter(ImageFilter.SHARPEN) | |
| final.save("final.png") | |
| number = pytesseract.image_to_string(Image.open('final.png'), lang='eng', | |
| config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789').strip() | |
| print("RESULT OF CAPTCHA:") | |
| print(number) | |
| print("===================") | |
| return number |
@arunpurshotam
That’s because you have a setting on line 29 that specifically says : tessedit_char_whitelist=[0123456789](tel:0123456789)') you should change or remove that
@AbdulMobinFata
Change line 29 to this :
result = pytesseract.image_to_string(Image.open('final.png'), lang='eng', config='--psm 10 --oem 3')
How do I use my own captcha to test this?
@MikeyD-rbg
Can you send more examples of your captcha and with better resolution ?
this snippet is ment for number as mention on line 28 tessedit_char_whitelist=0123456789
@Manedi send more examples
will try
@MikeyD-rbg Can you send more examples of your captcha and with better resolution ?
this snippet is ment for number as mention on line 28
tessedit_char_whitelist=0123456789
I've sent you 10 more examples. unfortunately all the captchas are from a site and all this size.
How many more examples would you like?
I've manged to solve one of the captchas but the z is not capitalized, how do I ensure all thr letters are capitals?
Added this line but still lowercase z;
config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
I have got good results with this script. The image preprocessing works really good and is the key to improve the code. Thanks @MrAch26!
@MrAch26 I've now downloaded over 200k captchas 😂 unfortunately all the same resolution, how many should I upload?

















Didn't work for attached image
