Spaces:
Sleeping
Sleeping
import pytesseract | |
import numpy as np | |
import cv2 | |
import re | |
import logging | |
from PIL import Image | |
# Setup logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
def preprocess_strong(img): | |
"""Sharpen and enhance contrast for blurry weight images.""" | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
# Resize up for OCR (scale x2) | |
h, w = gray.shape | |
gray = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_CUBIC) | |
# CLAHE for contrast enhancement | |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) | |
enhanced = clahe.apply(gray) | |
# Strong sharpening | |
kernel = np.array([[0, -1, 0], | |
[-1, 5, -1], | |
[0, -1, 0]]) | |
sharpened = cv2.filter2D(enhanced, -1, kernel) | |
return sharpened | |
def extract_weight_from_image(pil_img): | |
"""Extract weight from an image using multiple Tesseract strategies.""" | |
try: | |
img = np.array(pil_img) | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
processed = preprocess_strong(img) | |
# OCR configs | |
configs = [ | |
r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.', | |
r'--oem 3 --psm 11 -c tessedit_char_whitelist=0123456789.' | |
] | |
for config in configs: | |
raw_text = pytesseract.image_to_string(processed, config=config) | |
logging.info(f"[Tesseract Output {config}] Raw text: {raw_text}") | |
cleaned = raw_text.strip().replace('\n', '').replace(' ', '') | |
cleaned = re.sub(r"[^\d.]", "", cleaned) | |
if cleaned.count('.') > 1: | |
cleaned = cleaned.replace('.', '', cleaned.count('.') - 1) | |
if cleaned.startswith('.'): | |
cleaned = '0' + cleaned | |
if cleaned and re.fullmatch(r"\d*\.?\d*", cleaned): | |
value = float(cleaned) | |
if 0.001 <= value <= 5000: | |
return str(round(value, 2)), 90.0 | |
return "Not detected", 0.0 | |
except Exception as e: | |
logging.error(f"OCR failed: {e}") | |
return "Not detected", 0.0 | |