Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +105 -46
ocr_engine.py
CHANGED
@@ -12,36 +12,78 @@ easyocr_reader = easyocr.Reader(['en'], gpu=False)
|
|
12 |
|
13 |
def estimate_blur(img):
|
14 |
"""Estimate image blur using Laplacian variance"""
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
def
|
|
|
19 |
try:
|
20 |
-
# Convert to grayscale
|
21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
# CLAHE for contrast enhancement
|
27 |
-
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
28 |
contrast = clahe.apply(denoised)
|
29 |
|
30 |
-
# Adaptive thresholding
|
31 |
thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
32 |
cv2.THRESH_BINARY, 11, 2)
|
33 |
|
34 |
-
# Morphological operations
|
35 |
kernel = np.ones((3, 3), np.uint8)
|
36 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
37 |
|
38 |
-
#
|
39 |
-
|
|
|
|
|
40 |
sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
|
41 |
|
42 |
# Dynamic resizing
|
43 |
h, w = sharpened.shape
|
44 |
-
target_size = 800
|
45 |
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
|
46 |
if scale_factor != 1.0:
|
47 |
sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
|
@@ -49,49 +91,66 @@ def enhance_image(img):
|
|
49 |
|
50 |
return sharpened
|
51 |
except Exception as e:
|
52 |
-
logging.error(f"Image enhancement failed: {str(e)}")
|
53 |
-
return img
|
54 |
|
55 |
def extract_weight_from_image(pil_img):
|
56 |
try:
|
57 |
img = np.array(pil_img)
|
58 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
59 |
|
60 |
-
# Estimate blur
|
61 |
blur_score = estimate_blur(img)
|
62 |
-
conf_threshold = 0.
|
63 |
|
64 |
-
#
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
# Initialize results
|
68 |
best_weight = None
|
69 |
best_conf = 0.0
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
results = easyocr_reader.readtext(
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
if not best_weight:
|
97 |
logging.info("No valid weight detected")
|
@@ -103,7 +162,7 @@ def extract_weight_from_image(pil_img):
|
|
103 |
int_part = int_part.lstrip("0") or "0"
|
104 |
best_weight = f"{int_part}.{dec_part.rstrip('0')}"
|
105 |
else:
|
106 |
-
best_weight = best_weight.lstrip(
|
107 |
|
108 |
return best_weight, round(best_conf * 100, 2)
|
109 |
|
|
|
12 |
|
13 |
def estimate_blur(img):
|
14 |
"""Estimate image blur using Laplacian variance"""
|
15 |
+
try:
|
16 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
17 |
+
return cv2.Laplacian(gray, cv2.CV_64F).var()
|
18 |
+
except Exception as e:
|
19 |
+
logging.error(f"Blur estimation failed: {str(e)}")
|
20 |
+
return 100 # Default value for fallback
|
21 |
|
22 |
+
def detect_roi(img):
|
23 |
+
"""Detect and crop the region of interest (likely the digital display)"""
|
24 |
try:
|
|
|
25 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
26 |
+
# Adaptive thresholding to handle varying lighting
|
27 |
+
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
28 |
+
cv2.THRESH_BINARY_INV, 11, 2)
|
29 |
+
# Dilate to connect text regions
|
30 |
+
kernel = np.ones((5, 5), np.uint8)
|
31 |
+
dilated = cv2.dilate(thresh, kernel, iterations=1)
|
32 |
+
# Find contours
|
33 |
+
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
34 |
+
if contours:
|
35 |
+
# Get the largest contour with reasonable size
|
36 |
+
valid_contours = [c for c in contours if cv2.contourArea(c) > 1000]
|
37 |
+
if valid_contours:
|
38 |
+
largest_contour = max(valid_contours, key=cv2.contourArea)
|
39 |
+
x, y, w, h = cv2.boundingRect(largest_contour)
|
40 |
+
# Add padding and ensure bounds
|
41 |
+
x, y = max(0, x-20), max(0, y-20)
|
42 |
+
w, h = min(w+40, img.shape[1]-x), min(h+40, img.shape[0]-y)
|
43 |
+
if w > 50 and h > 30: # Minimum size for valid ROI
|
44 |
+
return img[y:y+h, x:x+w]
|
45 |
+
return img # Fallback to original image
|
46 |
+
except Exception as e:
|
47 |
+
logging.error(f"ROI detection failed: {str(e)}")
|
48 |
+
return img
|
49 |
|
50 |
+
def enhance_image(img, mode="standard"):
|
51 |
+
"""Enhance image with different modes for multi-scale processing"""
|
52 |
+
try:
|
53 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
54 |
+
|
55 |
+
if mode == "high_contrast":
|
56 |
+
# Stronger denoising and contrast for blurry images
|
57 |
+
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
58 |
+
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
59 |
+
elif mode == "low_noise":
|
60 |
+
# Gentle denoising for clear but noisy images
|
61 |
+
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
|
62 |
+
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
63 |
+
else:
|
64 |
+
# Standard preprocessing
|
65 |
+
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
|
66 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
67 |
|
|
|
|
|
68 |
contrast = clahe.apply(denoised)
|
69 |
|
70 |
+
# Adaptive thresholding
|
71 |
thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
72 |
cv2.THRESH_BINARY, 11, 2)
|
73 |
|
74 |
+
# Morphological operations
|
75 |
kernel = np.ones((3, 3), np.uint8)
|
76 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
77 |
|
78 |
+
# Adaptive sharpening
|
79 |
+
blur_score = estimate_blur(img)
|
80 |
+
sharpen_strength = 5 if blur_score < 100 else 3
|
81 |
+
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
82 |
sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
|
83 |
|
84 |
# Dynamic resizing
|
85 |
h, w = sharpened.shape
|
86 |
+
target_size = 800
|
87 |
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
|
88 |
if scale_factor != 1.0:
|
89 |
sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
|
|
|
91 |
|
92 |
return sharpened
|
93 |
except Exception as e:
|
94 |
+
logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
|
95 |
+
return img
|
96 |
|
97 |
def extract_weight_from_image(pil_img):
|
98 |
try:
|
99 |
img = np.array(pil_img)
|
100 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
101 |
|
102 |
+
# Estimate blur for adaptive thresholding
|
103 |
blur_score = estimate_blur(img)
|
104 |
+
conf_threshold = 0.35 if blur_score < 100 else 0.55 # Slightly stricter thresholds
|
105 |
|
106 |
+
# Detect ROI
|
107 |
+
roi_img = detect_roi(img)
|
108 |
+
|
109 |
+
# Process multiple image versions
|
110 |
+
images_to_process = [
|
111 |
+
("standard", enhance_image(roi_img, mode="standard"), {}),
|
112 |
+
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {}),
|
113 |
+
("low_noise", enhance_image(roi_img, mode="low_noise"), {}),
|
114 |
+
("original", roi_img, {'allowlist': '0123456789.'}) # Restrict to digits and decimal
|
115 |
+
]
|
116 |
|
|
|
117 |
best_weight = None
|
118 |
best_conf = 0.0
|
119 |
+
best_score = 0.0
|
120 |
+
|
121 |
+
for mode, proc_img, ocr_params in images_to_process:
|
122 |
+
# EasyOCR detection
|
123 |
+
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
|
124 |
+
|
125 |
+
for (bbox, text, conf) in results:
|
126 |
+
original_text = text
|
127 |
+
text = text.lower().strip()
|
128 |
+
|
129 |
+
# Fix common OCR errors
|
130 |
+
text = text.replace(",", ".").replace(";", ".")
|
131 |
+
text = text.replace("o", "0").replace("O", "0")
|
132 |
+
text = text.replace("s", "5").replace("S", "5")
|
133 |
+
text = text.replace("g", "9").replace("G", "6")
|
134 |
+
text = text.replace("l", "1").replace("I", "1")
|
135 |
+
text = text.replace("b", "8").replace("B", "8")
|
136 |
+
text = text.replace("z", "2").replace("Z", "2")
|
137 |
+
text = text.replace("q", "9").replace("Q", "9")
|
138 |
+
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
139 |
+
text = re.sub(r"[^\d\.]", "", text)
|
140 |
+
|
141 |
+
# Regex for weight (0.0 to 9999.999)
|
142 |
+
if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
|
143 |
+
try:
|
144 |
+
weight = float(text)
|
145 |
+
# Score based on realistic weight range (0.1–500 kg)
|
146 |
+
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
147 |
+
score = conf * range_score
|
148 |
+
if score > best_score and conf > conf_threshold:
|
149 |
+
best_weight = text
|
150 |
+
best_conf = conf
|
151 |
+
best_score = score
|
152 |
+
except ValueError:
|
153 |
+
continue
|
154 |
|
155 |
if not best_weight:
|
156 |
logging.info("No valid weight detected")
|
|
|
162 |
int_part = int_part.lstrip("0") or "0"
|
163 |
best_weight = f"{int_part}.{dec_part.rstrip('0')}"
|
164 |
else:
|
165 |
+
best_weight = best_weight.lstrip('0') or "0"
|
166 |
|
167 |
return best_weight, round(best_conf * 100, 2)
|
168 |
|