import cv2 import numpy as np import easyocr import re from typing import Tuple, List, Optional from PIL import Image, ImageDraw class WeightDetector: def __init__(self): """Initialize the OCR reader with English language support""" self.reader = easyocr.Reader(['en']) def preprocess_image(self, image_path: str) -> np.ndarray: """Preprocess the image for better OCR results""" img = cv2.imread(image_path) if img is None: raise ValueError("Could not read image from path") # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Apply adaptive thresholding processed = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) return processed def extract_weight_value(self, text: str) -> Optional[float]: """Extract weight value from text using regex patterns""" # Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc. patterns = [ r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)', r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)', r'(\d+\.\d+)', # Just numbers with decimal r'(\d+)' # Just whole numbers ] for pattern in patterns: match = re.search(pattern, text, re.IGNORECASE) if match: try: return float(match.group(1)) except ValueError: continue return None def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]: """Detect weight from an image and return value, metadata, and annotated image""" try: # Read and preprocess image img = Image.open(image_path).convert("RGB") img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) # Perform OCR results = self.reader.readtext(img_cv) # Find the most likely weight value detected_weights = [] for (bbox, text, prob) in results: weight = self.extract_weight_value(text) if weight is not None: detected_weights.append({ 'weight': weight, 'text': text, 'probability': prob, 'bbox': bbox }) # Sort by probability and get the highest if detected_weights: detected_weights.sort(key=lambda x: x['probability'], reverse=True) best_match = detected_weights[0] # Draw bounding boxes on image draw = ImageDraw.Draw(img) for item in detected_weights: bbox = item['bbox'] # Convert bbox coordinates to tuple of tuples polygon = [(int(x), int(y)) for [x, y] in bbox] draw.polygon(polygon, outline="red", width=2) # Add text label label = f"{item['weight']}g (p={item['probability']:.2f})" draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red") return best_match['weight'], detected_weights, img return None, [], img except Exception as e: print(f"Error processing image: {e}") return None, [], Image.new("RGB", (100, 100), color="white")