logger1 / ocr_engine.py
Sanjayraju30's picture
Update ocr_engine.py
2f21283 verified
raw
history blame
947 Bytes
import cv2
import pytesseract
import numpy as np
from PIL import Image
def preprocess_image(pil_image):
image = np.array(pil_image.convert("RGB"))
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
_, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return thresh
def extract_weight(pil_image):
try:
processed_img = preprocess_image(pil_image)
config = "--psm 7 -c tessedit_char_whitelist=0123456789."
text = pytesseract.image_to_string(processed_img, config=config)
print("OCR Raw:", text)
numbers = ''.join(filter(lambda x: x in "0123456789.", text))
if not numbers:
return "No valid weight detected"
weight_val = float(numbers)
unit = "kg" if weight_val >= 20 else "grams"
return f"{weight_val} {unit}"
except Exception as e:
return f"Error: {str(e)}"