import cv2 import pytesseract import numpy as np from PIL import Image def preprocess_image(pil_image): image = np.array(pil_image.convert("RGB")) gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) blurred = cv2.GaussianBlur(gray, (3, 3), 0) _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return thresh def extract_weight(pil_image): try: processed_img = preprocess_image(pil_image) config = "--psm 7 -c tessedit_char_whitelist=0123456789." text = pytesseract.image_to_string(processed_img, config=config) print("OCR Raw:", text) numbers = ''.join(filter(lambda x: x in "0123456789.", text)) if not numbers: return "No valid weight detected" weight_val = float(numbers) unit = "kg" if weight_val >= 20 else "grams" return f"{weight_val} {unit}" except Exception as e: return f"Error: {str(e)}"