import pytesseract import numpy as np import re import cv2 from PIL import Image def extract_weight_from_image(pil_img): try: # Convert PIL image to numpy array img = np.array(pil_img) # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # Resize image to improve OCR accuracy resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) # Apply Gaussian blur blurred = cv2.GaussianBlur(resized, (5, 5), 0) # Apply adaptive thresholding for better contrast thresh = cv2.adaptiveThreshold( blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # OCR config: use digit-only mode and whitelist characters config = "--psm 7 -c tessedit_char_whitelist=0123456789." # Extract text using pytesseract ocr_text = pytesseract.image_to_string(thresh, config=config) print("OCR Text:", ocr_text) # Use regex to find weight values (e.g., 52.35, 002.50 etc.) match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", ocr_text) if match: return match.group(), 95.0 else: return "No weight detected", 0.0 except Exception as e: return f"Error: {str(e)}", 0.0