import cv2 import numpy import argparse from pytesseract import* from PIL import Image, ImageFont, ImageDraw import numpy as np # def preprocess_image(image): def OCR(img, lang='hin', min_conf=0.25): rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # preprocessed_image = preprocess_image(rgb) # write the preprocessed image to disk as a temporary file so we can results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang=lang) out_text = "" for i in range(0, len(results["text"])): # We can then extract the bounding box coordinates # of the text region from the current result x = results["left"][i] y = results["top"][i] w = results["width"][i] h = results["height"][i] # We will also extract the OCR text itself along # with the confidence of the text localization text = results["text"][i] conf = int(results["conf"][i]) # filter out weak confidence text localizations if conf > min_conf: # We then strip out non-ASCII text so we can # draw the text on the image We will be using # OpenCV, then draw a bounding box around the # text along with the text itself text = "".join(text).strip() out_text += text + " " return out_text