import cv2 import pytesseract import numpy as np from PIL import Image def extract_weight_from_image(pil_img): try: # Convert PIL to OpenCV img = pil_img.convert("RGB") img = np.array(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Thresholding to highlight digits _, binary = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV) # Resize for better OCR resized = cv2.resize(binary, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) # Run OCR with digit whitelist config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' raw_text = pytesseract.image_to_string(resized, config=config) print("🔍 OCR Raw Output:", repr(raw_text)) # Show in Hugging Face logs # Filter for digits only weight = ''.join(filter(lambda c: c in '0123456789.', raw_text)) confidence = 95 if weight else 0 return weight.strip(), confidence except Exception as e: print("❌ OCR Error:", str(e)) return "", 0