import cv2 import pytesseract import numpy as np from PIL import Image def extract_weight(pil_image: Image.Image) -> str: # Convert to OpenCV format img = np.array(pil_image.convert("RGB")) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # Enhance image for OCR gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR) blurred = cv2.GaussianBlur(gray, (5, 5), 0) _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # OCR with config config = "--psm 7 -c tessedit_char_whitelist=0123456789." text = pytesseract.image_to_string(thresh, config=config) # Extract digits and decimal weight = ''.join(filter(lambda x: x in '0123456789.', text)) return weight.strip() if weight else "No valid weight detected"