Spaces:
Runtime error
Runtime error
File size: 1,384 Bytes
1b870f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import cv2
import numpy
import argparse
from pytesseract import*
from PIL import Image, ImageFont, ImageDraw
import numpy as np
# def preprocess_image(image):
def OCR(img, lang='hin', min_conf=0.25):
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# preprocessed_image = preprocess_image(rgb)
# write the preprocessed image to disk as a temporary file so we can
results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang=lang)
out_text = ""
for i in range(0, len(results["text"])):
# We can then extract the bounding box coordinates
# of the text region from the current result
x = results["left"][i]
y = results["top"][i]
w = results["width"][i]
h = results["height"][i]
# We will also extract the OCR text itself along
# with the confidence of the text localization
text = results["text"][i]
conf = int(results["conf"][i])
# filter out weak confidence text localizations
if conf > min_conf:
# We then strip out non-ASCII text so we can
# draw the text on the image We will be using
# OpenCV, then draw a bounding box around the
# text along with the text itself
text = "".join(text).strip()
out_text += text + " "
return out_text |