Spaces:
Running
Running
Rename ocr_engine.py to weight_detector.py
Browse files- ocr_engine.py +0 -95
- weight_detector.py +94 -0
ocr_engine.py
DELETED
@@ -1,95 +0,0 @@
|
|
1 |
-
import easyocr
|
2 |
-
import numpy as np
|
3 |
-
import cv2
|
4 |
-
import re
|
5 |
-
from PIL import Image
|
6 |
-
import logging
|
7 |
-
import sys
|
8 |
-
|
9 |
-
# Set up logging
|
10 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
|
11 |
-
|
12 |
-
# Initialize EasyOCR reader (once at module level for efficiency)
|
13 |
-
reader = easyocr.Reader(['en'], gpu=False) # GPU=False for CPU-only environments like Hugging Face Spaces
|
14 |
-
|
15 |
-
def preprocess_image(img):
|
16 |
-
"""Preprocess image for robust OCR with EasyOCR, optimized for weight displays."""
|
17 |
-
try:
|
18 |
-
# Convert PIL to OpenCV format
|
19 |
-
img = np.array(img)
|
20 |
-
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
21 |
-
|
22 |
-
# Convert to grayscale
|
23 |
-
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
24 |
-
|
25 |
-
# Enhance contrast for diverse lighting conditions
|
26 |
-
clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
|
27 |
-
enhanced = clahe.apply(gray)
|
28 |
-
|
29 |
-
# Apply adaptive thresholding
|
30 |
-
block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
|
31 |
-
thresh = cv2.adaptiveThreshold(
|
32 |
-
enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
|
33 |
-
)
|
34 |
-
|
35 |
-
return thresh # EasyOCR handles further processing
|
36 |
-
except Exception as e:
|
37 |
-
logging.error(f"Preprocessing failed: {str(e)}")
|
38 |
-
return gray
|
39 |
-
|
40 |
-
def extract_weight_from_image(pil_img):
|
41 |
-
"""Extract weight and unit from a digital scale image using EasyOCR."""
|
42 |
-
try:
|
43 |
-
# Preprocess image
|
44 |
-
thresh = preprocess_image(pil_img)
|
45 |
-
|
46 |
-
# Convert to numpy array for EasyOCR
|
47 |
-
img_np = np.array(thresh)
|
48 |
-
|
49 |
-
# Use EasyOCR to detect text
|
50 |
-
results = reader.readtext(img_np, detail=1, paragraph=False)
|
51 |
-
logging.info(f"EasyOCR raw output: {results}")
|
52 |
-
|
53 |
-
# Extract weight and unit from detected text
|
54 |
-
text = " ".join([result[1] for result in results]) # Combine all detected text
|
55 |
-
text = text.strip().lower()
|
56 |
-
text = re.sub(r'\s+', ' ', text) # Normalize spaces
|
57 |
-
|
58 |
-
# Extract weight and unit, prioritizing common formats
|
59 |
-
match = re.search(r'(-?\d*\.?\d+)([kgkg]?)', text)
|
60 |
-
if match:
|
61 |
-
weight_str = match.group(1)
|
62 |
-
unit = match.group(2) if match.group(2) else "g" # Default to grams if no unit
|
63 |
-
weight_str = weight_str.replace(',', '.') # Handle decimal formats (e.g., 68,0)
|
64 |
-
if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
|
65 |
-
weight_str = weight_str.lstrip('0') or '0'
|
66 |
-
confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100 # Convert EasyOCR confidence (0-1) to percentage
|
67 |
-
try:
|
68 |
-
weight = float(weight_str)
|
69 |
-
if -5000 <= weight <= 5000:
|
70 |
-
logging.info(f"Detected weight: {weight} {unit}, Confidence: {confidence:.2f}%")
|
71 |
-
return weight_str, confidence, unit
|
72 |
-
except ValueError:
|
73 |
-
logging.warning(f"Invalid weight format: {weight_str}")
|
74 |
-
|
75 |
-
# Fallback to detect numbers without units if no match
|
76 |
-
match_no_unit = re.search(r'(-?\d*\.?\d+)', text)
|
77 |
-
if match_no_unit and not match:
|
78 |
-
weight_str = match_no_unit.group(1)
|
79 |
-
weight_str = weight_str.replace(',', '.')
|
80 |
-
if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
|
81 |
-
weight_str = weight_str.lstrip('0') or '0'
|
82 |
-
confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100
|
83 |
-
try:
|
84 |
-
weight = float(weight_str)
|
85 |
-
if -5000 <= weight <= 5000:
|
86 |
-
logging.info(f"Detected weight (no unit): {weight} g, Confidence: {confidence:.2f}%")
|
87 |
-
return weight_str, confidence, "g"
|
88 |
-
except ValueError:
|
89 |
-
logging.warning(f"Invalid weight format: {weight_str}")
|
90 |
-
|
91 |
-
logging.info("No valid weight detected.")
|
92 |
-
return "Not detected", 0.0, ""
|
93 |
-
except Exception as e:
|
94 |
-
logging.error(f"Weight extraction failed: {str(e)}")
|
95 |
-
return "Not detected", 0.0, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
weight_detector.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import easyocr
|
4 |
+
import re
|
5 |
+
from typing import Tuple, List, Optional
|
6 |
+
from PIL import Image, ImageDraw
|
7 |
+
|
8 |
+
class WeightDetector:
|
9 |
+
def __init__(self):
|
10 |
+
"""Initialize the OCR reader with English language support"""
|
11 |
+
self.reader = easyocr.Reader(['en'])
|
12 |
+
|
13 |
+
def preprocess_image(self, image_path: str) -> np.ndarray:
|
14 |
+
"""Preprocess the image for better OCR results"""
|
15 |
+
img = cv2.imread(image_path)
|
16 |
+
if img is None:
|
17 |
+
raise ValueError("Could not read image from path")
|
18 |
+
|
19 |
+
# Convert to grayscale
|
20 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
21 |
+
|
22 |
+
# Apply adaptive thresholding
|
23 |
+
processed = cv2.adaptiveThreshold(
|
24 |
+
gray, 255,
|
25 |
+
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
26 |
+
cv2.THRESH_BINARY, 11, 2
|
27 |
+
)
|
28 |
+
|
29 |
+
return processed
|
30 |
+
|
31 |
+
def extract_weight_value(self, text: str) -> Optional[float]:
|
32 |
+
"""Extract weight value from text using regex patterns"""
|
33 |
+
# Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc.
|
34 |
+
patterns = [
|
35 |
+
r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
|
36 |
+
r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
|
37 |
+
r'(\d+\.\d+)', # Just numbers with decimal
|
38 |
+
r'(\d+)' # Just whole numbers
|
39 |
+
]
|
40 |
+
|
41 |
+
for pattern in patterns:
|
42 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
43 |
+
if match:
|
44 |
+
try:
|
45 |
+
return float(match.group(1))
|
46 |
+
except ValueError:
|
47 |
+
continue
|
48 |
+
return None
|
49 |
+
|
50 |
+
def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]:
|
51 |
+
"""Detect weight from an image and return value, metadata, and annotated image"""
|
52 |
+
try:
|
53 |
+
# Read and preprocess image
|
54 |
+
img = Image.open(image_path).convert("RGB")
|
55 |
+
img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
56 |
+
|
57 |
+
# Perform OCR
|
58 |
+
results = self.reader.readtext(img_cv)
|
59 |
+
|
60 |
+
# Find the most likely weight value
|
61 |
+
detected_weights = []
|
62 |
+
for (bbox, text, prob) in results:
|
63 |
+
weight = self.extract_weight_value(text)
|
64 |
+
if weight is not None:
|
65 |
+
detected_weights.append({
|
66 |
+
'weight': weight,
|
67 |
+
'text': text,
|
68 |
+
'probability': prob,
|
69 |
+
'bbox': bbox
|
70 |
+
})
|
71 |
+
|
72 |
+
# Sort by probability and get the highest
|
73 |
+
if detected_weights:
|
74 |
+
detected_weights.sort(key=lambda x: x['probability'], reverse=True)
|
75 |
+
best_match = detected_weights[0]
|
76 |
+
|
77 |
+
# Draw bounding boxes on image
|
78 |
+
draw = ImageDraw.Draw(img)
|
79 |
+
for item in detected_weights:
|
80 |
+
bbox = item['bbox']
|
81 |
+
# Convert bbox coordinates to tuple of tuples
|
82 |
+
polygon = [(int(x), int(y)) for [x, y] in bbox]
|
83 |
+
draw.polygon(polygon, outline="red", width=2)
|
84 |
+
# Add text label
|
85 |
+
label = f"{item['weight']}g (p={item['probability']:.2f})"
|
86 |
+
draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red")
|
87 |
+
|
88 |
+
return best_match['weight'], detected_weights, img
|
89 |
+
|
90 |
+
return None, [], img
|
91 |
+
|
92 |
+
except Exception as e:
|
93 |
+
print(f"Error processing image: {e}")
|
94 |
+
return None, [], Image.new("RGB", (100, 100), color="white")
|