Spaces:
Runtime error
Runtime error
import cv2 | |
import numpy | |
import argparse | |
from pytesseract import* | |
from PIL import Image, ImageFont, ImageDraw | |
import numpy as np | |
# def preprocess_image(image): | |
def OCR(img, lang='hin', min_conf=0.25): | |
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
# preprocessed_image = preprocess_image(rgb) | |
# write the preprocessed image to disk as a temporary file so we can | |
results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang=lang) | |
out_text = "" | |
for i in range(0, len(results["text"])): | |
# We can then extract the bounding box coordinates | |
# of the text region from the current result | |
x = results["left"][i] | |
y = results["top"][i] | |
w = results["width"][i] | |
h = results["height"][i] | |
# We will also extract the OCR text itself along | |
# with the confidence of the text localization | |
text = results["text"][i] | |
conf = int(results["conf"][i]) | |
# filter out weak confidence text localizations | |
if conf > min_conf: | |
# We then strip out non-ASCII text so we can | |
# draw the text on the image We will be using | |
# OpenCV, then draw a bounding box around the | |
# text along with the text itself | |
text = "".join(text).strip() | |
out_text += text + " " | |
return out_text |