Spaces:
Sleeping
Sleeping
import cv2 | |
import numpy as np | |
import easyocr | |
import re | |
from typing import Tuple, List, Optional | |
from PIL import Image, ImageDraw | |
class WeightDetector: | |
def __init__(self): | |
"""Initialize the OCR reader with English language support""" | |
self.reader = easyocr.Reader(['en']) | |
def preprocess_image(self, image_path: str) -> np.ndarray: | |
"""Preprocess the image for better OCR results""" | |
img = cv2.imread(image_path) | |
if img is None: | |
raise ValueError("Could not read image from path") | |
# Convert to grayscale | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
# Apply adaptive thresholding | |
processed = cv2.adaptiveThreshold( | |
gray, 255, | |
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
cv2.THRESH_BINARY, 11, 2 | |
) | |
return processed | |
def extract_weight_value(self, text: str) -> Optional[float]: | |
"""Extract weight value from text using regex patterns""" | |
# Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc. | |
patterns = [ | |
r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)', | |
r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)', | |
r'(\d+\.\d+)', # Just numbers with decimal | |
r'(\d+)' # Just whole numbers | |
] | |
for pattern in patterns: | |
match = re.search(pattern, text, re.IGNORECASE) | |
if match: | |
try: | |
return float(match.group(1)) | |
except ValueError: | |
continue | |
return None | |
def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]: | |
"""Detect weight from an image and return value, metadata, and annotated image""" | |
try: | |
# Read and preprocess image | |
img = Image.open(image_path).convert("RGB") | |
img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) | |
# Perform OCR | |
results = self.reader.readtext(img_cv) | |
# Find the most likely weight value | |
detected_weights = [] | |
for (bbox, text, prob) in results: | |
weight = self.extract_weight_value(text) | |
if weight is not None: | |
detected_weights.append({ | |
'weight': weight, | |
'text': text, | |
'probability': prob, | |
'bbox': bbox | |
}) | |
# Sort by probability and get the highest | |
if detected_weights: | |
detected_weights.sort(key=lambda x: x['probability'], reverse=True) | |
best_match = detected_weights[0] | |
# Draw bounding boxes on image | |
draw = ImageDraw.Draw(img) | |
for item in detected_weights: | |
bbox = item['bbox'] | |
# Convert bbox coordinates to tuple of tuples | |
polygon = [(int(x), int(y)) for [x, y] in bbox] | |
draw.polygon(polygon, outline="red", width=2) | |
# Add text label | |
label = f"{item['weight']}g (p={item['probability']:.2f})" | |
draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red") | |
return best_match['weight'], detected_weights, img | |
return None, [], img | |
except Exception as e: | |
print(f"Error processing image: {e}") | |
return None, [], Image.new("RGB", (100, 100), color="white") |