Spaces:
Running
Running
import cv2 | |
import numpy as np | |
import easyocr | |
import re | |
from typing import Tuple, List, Optional | |
from PIL import Image, ImageDraw | |
import pytz | |
from datetime import datetime | |
class WeightDetector: | |
def __init__(self): | |
"""Initialize with English and optimized settings""" | |
self.reader = easyocr.Reader( | |
['en'], | |
gpu=True, | |
model_storage_directory='model', | |
download_enabled=True | |
) | |
self.ist = pytz.timezone('Asia/Kolkata') | |
def get_current_ist(self) -> str: | |
"""Get current time in Indian Standard Time""" | |
return datetime.now(self.ist).strftime('%Y-%m-%d %H:%M:%S %Z') | |
def preprocess_image(self, image: np.ndarray) -> np.ndarray: | |
"""Enhanced image preprocessing for digital displays""" | |
# Convert to grayscale | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# Contrast enhancement | |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) | |
contrast_enhanced = clahe.apply(gray) | |
# Thresholding for digital displays | |
_, thresh = cv2.threshold(contrast_enhanced, 0, 255, | |
cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
# Noise reduction | |
denoised = cv2.medianBlur(thresh, 3) | |
return denoised | |
def extract_weight_value(self, text: str) -> Optional[float]: | |
"""Improved weight extraction with better pattern matching""" | |
# Clean the text | |
text = text.replace(' ', '').replace(',', '.').lower() | |
# Patterns for digital scale displays | |
patterns = [ | |
r'(\d+\.\d+)[gkl]', # 12.34g or 12.34kg | |
r'(\d+)[gkl]', # 123g or 123kg | |
r'(\d+\.\d+)', # Just numbers with decimal | |
r'(\d+)' # Just whole numbers | |
] | |
for pattern in patterns: | |
match = re.search(pattern, text) | |
if match: | |
try: | |
value = float(match.group(1)) | |
# Assume grams if no unit specified | |
if 'k' in text: | |
return value * 1000 # Convert kg to g | |
return value | |
except ValueError: | |
continue | |
return None | |
def detect_weight(self, image_path: str) -> Tuple[Optional[float], str, Image.Image]: | |
"""Enhanced weight detection with better error handling""" | |
try: | |
# Read image | |
img = Image.open(image_path).convert("RGB") | |
img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) | |
# Preprocess | |
processed = self.preprocess_image(img_cv) | |
# OCR with configuration optimized for digital displays | |
results = self.reader.readtext( | |
processed, | |
paragraph=False, | |
detail=1, | |
allowlist='0123456789.gkGlL', | |
width_ths=2.0, | |
text_threshold=0.7 | |
) | |
# Process all potential weight values | |
detected_weights = [] | |
for (bbox, text, prob) in results: | |
weight = self.extract_weight_value(text) | |
if weight is not None and prob > 0.4: # Minimum confidence | |
detected_weights.append({ | |
'weight': weight, | |
'text': text, | |
'probability': prob, | |
'bbox': bbox | |
}) | |
# Prepare output | |
draw = ImageDraw.Draw(img) | |
current_time = self.get_current_ist() | |
if detected_weights: | |
# Sort by probability and area (larger text is more likely the weight) | |
detected_weights.sort( | |
key=lambda x: ( | |
x['probability'], | |
(x['bbox'][2][0] - x['bbox'][0][0]) * # Width | |
(x['bbox'][2][1] - x['bbox'][0][1]) # Height | |
), | |
reverse=True | |
) | |
best_match = detected_weights[0] | |
# Draw all detections | |
for item in detected_weights: | |
bbox = item['bbox'] | |
polygon = [(int(x), int(y)) for [x, y] in bbox] | |
color = "green" if item == best_match else "red" | |
draw.polygon(polygon, outline=color, width=2) | |
label = f"{item['weight']}g (p={item['probability']:.2f})" | |
draw.text((polygon[0][0], polygon[0][1] - 15), label, fill=color) | |
# Add timestamp to image | |
draw.text((10, 10), f"Captured at: {current_time}", fill="blue") | |
return best_match['weight'], current_time, img | |
# No weight detected | |
draw.text((10, 10), f"Captured at: {current_time}", fill="blue") | |
return None, current_time, img | |
except Exception as e: | |
current_time = self.get_current_ist() | |
error_img = Image.new("RGB", (300, 100), color="white") | |
draw = ImageDraw.Draw(error_img) | |
draw.text((10, 10), f"Error: {str(e)}", fill="red") | |
draw.text((10, 30), f"Time: {current_time}", fill="blue") | |
return None, current_time, error_img |