Sanjayraju30 commited on
Commit
ee68036
·
verified ·
1 Parent(s): f4861ec

Rename ocr_engine.py to weight_detector.py

Browse files
Files changed (2) hide show
  1. ocr_engine.py +0 -95
  2. weight_detector.py +94 -0
ocr_engine.py DELETED
@@ -1,95 +0,0 @@
1
- import easyocr
2
- import numpy as np
3
- import cv2
4
- import re
5
- from PIL import Image
6
- import logging
7
- import sys
8
-
9
- # Set up logging
10
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
11
-
12
- # Initialize EasyOCR reader (once at module level for efficiency)
13
- reader = easyocr.Reader(['en'], gpu=False) # GPU=False for CPU-only environments like Hugging Face Spaces
14
-
15
- def preprocess_image(img):
16
- """Preprocess image for robust OCR with EasyOCR, optimized for weight displays."""
17
- try:
18
- # Convert PIL to OpenCV format
19
- img = np.array(img)
20
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
21
-
22
- # Convert to grayscale
23
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
24
-
25
- # Enhance contrast for diverse lighting conditions
26
- clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
27
- enhanced = clahe.apply(gray)
28
-
29
- # Apply adaptive thresholding
30
- block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
31
- thresh = cv2.adaptiveThreshold(
32
- enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
33
- )
34
-
35
- return thresh # EasyOCR handles further processing
36
- except Exception as e:
37
- logging.error(f"Preprocessing failed: {str(e)}")
38
- return gray
39
-
40
- def extract_weight_from_image(pil_img):
41
- """Extract weight and unit from a digital scale image using EasyOCR."""
42
- try:
43
- # Preprocess image
44
- thresh = preprocess_image(pil_img)
45
-
46
- # Convert to numpy array for EasyOCR
47
- img_np = np.array(thresh)
48
-
49
- # Use EasyOCR to detect text
50
- results = reader.readtext(img_np, detail=1, paragraph=False)
51
- logging.info(f"EasyOCR raw output: {results}")
52
-
53
- # Extract weight and unit from detected text
54
- text = " ".join([result[1] for result in results]) # Combine all detected text
55
- text = text.strip().lower()
56
- text = re.sub(r'\s+', ' ', text) # Normalize spaces
57
-
58
- # Extract weight and unit, prioritizing common formats
59
- match = re.search(r'(-?\d*\.?\d+)([kgkg]?)', text)
60
- if match:
61
- weight_str = match.group(1)
62
- unit = match.group(2) if match.group(2) else "g" # Default to grams if no unit
63
- weight_str = weight_str.replace(',', '.') # Handle decimal formats (e.g., 68,0)
64
- if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
65
- weight_str = weight_str.lstrip('0') or '0'
66
- confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100 # Convert EasyOCR confidence (0-1) to percentage
67
- try:
68
- weight = float(weight_str)
69
- if -5000 <= weight <= 5000:
70
- logging.info(f"Detected weight: {weight} {unit}, Confidence: {confidence:.2f}%")
71
- return weight_str, confidence, unit
72
- except ValueError:
73
- logging.warning(f"Invalid weight format: {weight_str}")
74
-
75
- # Fallback to detect numbers without units if no match
76
- match_no_unit = re.search(r'(-?\d*\.?\d+)', text)
77
- if match_no_unit and not match:
78
- weight_str = match_no_unit.group(1)
79
- weight_str = weight_str.replace(',', '.')
80
- if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
81
- weight_str = weight_str.lstrip('0') or '0'
82
- confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100
83
- try:
84
- weight = float(weight_str)
85
- if -5000 <= weight <= 5000:
86
- logging.info(f"Detected weight (no unit): {weight} g, Confidence: {confidence:.2f}%")
87
- return weight_str, confidence, "g"
88
- except ValueError:
89
- logging.warning(f"Invalid weight format: {weight_str}")
90
-
91
- logging.info("No valid weight detected.")
92
- return "Not detected", 0.0, ""
93
- except Exception as e:
94
- logging.error(f"Weight extraction failed: {str(e)}")
95
- return "Not detected", 0.0, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
weight_detector.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import easyocr
4
+ import re
5
+ from typing import Tuple, List, Optional
6
+ from PIL import Image, ImageDraw
7
+
8
+ class WeightDetector:
9
+ def __init__(self):
10
+ """Initialize the OCR reader with English language support"""
11
+ self.reader = easyocr.Reader(['en'])
12
+
13
+ def preprocess_image(self, image_path: str) -> np.ndarray:
14
+ """Preprocess the image for better OCR results"""
15
+ img = cv2.imread(image_path)
16
+ if img is None:
17
+ raise ValueError("Could not read image from path")
18
+
19
+ # Convert to grayscale
20
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
21
+
22
+ # Apply adaptive thresholding
23
+ processed = cv2.adaptiveThreshold(
24
+ gray, 255,
25
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
26
+ cv2.THRESH_BINARY, 11, 2
27
+ )
28
+
29
+ return processed
30
+
31
+ def extract_weight_value(self, text: str) -> Optional[float]:
32
+ """Extract weight value from text using regex patterns"""
33
+ # Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc.
34
+ patterns = [
35
+ r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
36
+ r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
37
+ r'(\d+\.\d+)', # Just numbers with decimal
38
+ r'(\d+)' # Just whole numbers
39
+ ]
40
+
41
+ for pattern in patterns:
42
+ match = re.search(pattern, text, re.IGNORECASE)
43
+ if match:
44
+ try:
45
+ return float(match.group(1))
46
+ except ValueError:
47
+ continue
48
+ return None
49
+
50
+ def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]:
51
+ """Detect weight from an image and return value, metadata, and annotated image"""
52
+ try:
53
+ # Read and preprocess image
54
+ img = Image.open(image_path).convert("RGB")
55
+ img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
56
+
57
+ # Perform OCR
58
+ results = self.reader.readtext(img_cv)
59
+
60
+ # Find the most likely weight value
61
+ detected_weights = []
62
+ for (bbox, text, prob) in results:
63
+ weight = self.extract_weight_value(text)
64
+ if weight is not None:
65
+ detected_weights.append({
66
+ 'weight': weight,
67
+ 'text': text,
68
+ 'probability': prob,
69
+ 'bbox': bbox
70
+ })
71
+
72
+ # Sort by probability and get the highest
73
+ if detected_weights:
74
+ detected_weights.sort(key=lambda x: x['probability'], reverse=True)
75
+ best_match = detected_weights[0]
76
+
77
+ # Draw bounding boxes on image
78
+ draw = ImageDraw.Draw(img)
79
+ for item in detected_weights:
80
+ bbox = item['bbox']
81
+ # Convert bbox coordinates to tuple of tuples
82
+ polygon = [(int(x), int(y)) for [x, y] in bbox]
83
+ draw.polygon(polygon, outline="red", width=2)
84
+ # Add text label
85
+ label = f"{item['weight']}g (p={item['probability']:.2f})"
86
+ draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red")
87
+
88
+ return best_match['weight'], detected_weights, img
89
+
90
+ return None, [], img
91
+
92
+ except Exception as e:
93
+ print(f"Error processing image: {e}")
94
+ return None, [], Image.new("RGB", (100, 100), color="white")