Spaces:
Sleeping
Sleeping
import easyocr | |
import numpy as np | |
import cv2 | |
import re | |
from PIL import Image | |
import logging | |
import sys | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)]) | |
# Initialize EasyOCR reader (once at module level for efficiency) | |
reader = easyocr.Reader(['en'], gpu=False) # GPU=False for CPU-only environments like Hugging Face Spaces | |
def preprocess_image(img): | |
"""Preprocess image for robust OCR with EasyOCR, optimized for weight displays.""" | |
try: | |
# Convert PIL to OpenCV format | |
img = np.array(img) | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
# Convert to grayscale | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
# Enhance contrast for diverse lighting conditions | |
clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8)) | |
enhanced = clahe.apply(gray) | |
# Apply adaptive thresholding | |
block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1)) | |
thresh = cv2.adaptiveThreshold( | |
enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2 | |
) | |
return thresh # EasyOCR handles further processing | |
except Exception as e: | |
logging.error(f"Preprocessing failed: {str(e)}") | |
return gray | |
def extract_weight_from_image(pil_img): | |
"""Extract weight and unit from a digital scale image using EasyOCR.""" | |
try: | |
# Preprocess image | |
thresh = preprocess_image(pil_img) | |
# Convert to numpy array for EasyOCR | |
img_np = np.array(thresh) | |
# Use EasyOCR to detect text | |
results = reader.readtext(img_np, detail=1, paragraph=False) | |
logging.info(f"EasyOCR raw output: {results}") | |
# Extract weight and unit from detected text | |
text = " ".join([result[1] for result in results]) # Combine all detected text | |
text = text.strip().lower() | |
text = re.sub(r'\s+', ' ', text) # Normalize spaces | |
# Extract weight and unit, prioritizing common formats | |
match = re.search(r'(-?\d*\.?\d+)([kgkg]?)', text) | |
if match: | |
weight_str = match.group(1) | |
unit = match.group(2) if match.group(2) else "g" # Default to grams if no unit | |
weight_str = weight_str.replace(',', '.') # Handle decimal formats (e.g., 68,0) | |
if re.fullmatch(r'^-?\d*\.?\d+$', weight_str): | |
weight_str = weight_str.lstrip('0') or '0' | |
confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100 # Convert EasyOCR confidence (0-1) to percentage | |
try: | |
weight = float(weight_str) | |
if -5000 <= weight <= 5000: | |
logging.info(f"Detected weight: {weight} {unit}, Confidence: {confidence:.2f}%") | |
return weight_str, confidence, unit | |
except ValueError: | |
logging.warning(f"Invalid weight format: {weight_str}") | |
# Fallback to detect numbers without units if no match | |
match_no_unit = re.search(r'(-?\d*\.?\d+)', text) | |
if match_no_unit and not match: | |
weight_str = match_no_unit.group(1) | |
weight_str = weight_str.replace(',', '.') | |
if re.fullmatch(r'^-?\d*\.?\d+$', weight_str): | |
weight_str = weight_str.lstrip('0') or '0' | |
confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100 | |
try: | |
weight = float(weight_str) | |
if -5000 <= weight <= 5000: | |
logging.info(f"Detected weight (no unit): {weight} g, Confidence: {confidence:.2f}%") | |
return weight_str, confidence, "g" | |
except ValueError: | |
logging.warning(f"Invalid weight format: {weight_str}") | |
logging.info("No valid weight detected.") | |
return "Not detected", 0.0, "" | |
except Exception as e: | |
logging.error(f"Weight extraction failed: {str(e)}") | |
return "Not detected", 0.0, "" |