logger / weight_detector.py
Sanjayraju30's picture
Rename ocr_engine.py to weight_detector.py
ee68036 verified
raw
history blame
3.68 kB
import cv2
import numpy as np
import easyocr
import re
from typing import Tuple, List, Optional
from PIL import Image, ImageDraw
class WeightDetector:
def __init__(self):
"""Initialize the OCR reader with English language support"""
self.reader = easyocr.Reader(['en'])
def preprocess_image(self, image_path: str) -> np.ndarray:
"""Preprocess the image for better OCR results"""
img = cv2.imread(image_path)
if img is None:
raise ValueError("Could not read image from path")
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
processed = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
return processed
def extract_weight_value(self, text: str) -> Optional[float]:
"""Extract weight value from text using regex patterns"""
# Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc.
patterns = [
r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
r'(\d+\.\d+)', # Just numbers with decimal
r'(\d+)' # Just whole numbers
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
try:
return float(match.group(1))
except ValueError:
continue
return None
def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]:
"""Detect weight from an image and return value, metadata, and annotated image"""
try:
# Read and preprocess image
img = Image.open(image_path).convert("RGB")
img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# Perform OCR
results = self.reader.readtext(img_cv)
# Find the most likely weight value
detected_weights = []
for (bbox, text, prob) in results:
weight = self.extract_weight_value(text)
if weight is not None:
detected_weights.append({
'weight': weight,
'text': text,
'probability': prob,
'bbox': bbox
})
# Sort by probability and get the highest
if detected_weights:
detected_weights.sort(key=lambda x: x['probability'], reverse=True)
best_match = detected_weights[0]
# Draw bounding boxes on image
draw = ImageDraw.Draw(img)
for item in detected_weights:
bbox = item['bbox']
# Convert bbox coordinates to tuple of tuples
polygon = [(int(x), int(y)) for [x, y] in bbox]
draw.polygon(polygon, outline="red", width=2)
# Add text label
label = f"{item['weight']}g (p={item['probability']:.2f})"
draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red")
return best_match['weight'], detected_weights, img
return None, [], img
except Exception as e:
print(f"Error processing image: {e}")
return None, [], Image.new("RGB", (100, 100), color="white")