Spaces:
Sleeping
Sleeping
File size: 3,684 Bytes
ee68036 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import cv2
import numpy as np
import easyocr
import re
from typing import Tuple, List, Optional
from PIL import Image, ImageDraw
class WeightDetector:
def __init__(self):
"""Initialize the OCR reader with English language support"""
self.reader = easyocr.Reader(['en'])
def preprocess_image(self, image_path: str) -> np.ndarray:
"""Preprocess the image for better OCR results"""
img = cv2.imread(image_path)
if img is None:
raise ValueError("Could not read image from path")
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
processed = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
return processed
def extract_weight_value(self, text: str) -> Optional[float]:
"""Extract weight value from text using regex patterns"""
# Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc.
patterns = [
r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
r'(\d+\.\d+)', # Just numbers with decimal
r'(\d+)' # Just whole numbers
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
try:
return float(match.group(1))
except ValueError:
continue
return None
def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]:
"""Detect weight from an image and return value, metadata, and annotated image"""
try:
# Read and preprocess image
img = Image.open(image_path).convert("RGB")
img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# Perform OCR
results = self.reader.readtext(img_cv)
# Find the most likely weight value
detected_weights = []
for (bbox, text, prob) in results:
weight = self.extract_weight_value(text)
if weight is not None:
detected_weights.append({
'weight': weight,
'text': text,
'probability': prob,
'bbox': bbox
})
# Sort by probability and get the highest
if detected_weights:
detected_weights.sort(key=lambda x: x['probability'], reverse=True)
best_match = detected_weights[0]
# Draw bounding boxes on image
draw = ImageDraw.Draw(img)
for item in detected_weights:
bbox = item['bbox']
# Convert bbox coordinates to tuple of tuples
polygon = [(int(x), int(y)) for [x, y] in bbox]
draw.polygon(polygon, outline="red", width=2)
# Add text label
label = f"{item['weight']}g (p={item['probability']:.2f})"
draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red")
return best_match['weight'], detected_weights, img
return None, [], img
except Exception as e:
print(f"Error processing image: {e}")
return None, [], Image.new("RGB", (100, 100), color="white") |