Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +43 -40
ocr_engine.py
CHANGED
@@ -10,39 +10,36 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
10 |
# Initialize EasyOCR
|
11 |
easyocr_reader = easyocr.Reader(['en'], gpu=False)
|
12 |
|
13 |
-
def
|
14 |
-
"""Estimate image
|
15 |
-
|
16 |
-
|
17 |
-
return cv2.Laplacian(gray, cv2.CV_64F).var()
|
18 |
-
except Exception as e:
|
19 |
-
logging.error(f"Blur estimation failed: {str(e)}")
|
20 |
-
return 100 # Default value for fallback
|
21 |
|
22 |
def detect_roi(img):
|
23 |
"""Detect and crop the region of interest (likely the digital display)"""
|
24 |
try:
|
25 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
32 |
# Find contours
|
33 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
34 |
if contours:
|
35 |
# Get the largest contour with reasonable size
|
36 |
-
valid_contours = [c for c in contours if cv2.contourArea(c) >
|
37 |
if valid_contours:
|
38 |
largest_contour = max(valid_contours, key=cv2.contourArea)
|
39 |
x, y, w, h = cv2.boundingRect(largest_contour)
|
40 |
-
# Add padding and ensure bounds
|
41 |
-
x, y = max(0, x-
|
42 |
-
w, h = min(w+
|
43 |
-
if w > 50 and h > 30:
|
44 |
return img[y:y+h, x:x+w]
|
45 |
-
return img
|
46 |
except Exception as e:
|
47 |
logging.error(f"ROI detection failed: {str(e)}")
|
48 |
return img
|
@@ -52,32 +49,34 @@ def enhance_image(img, mode="standard"):
|
|
52 |
try:
|
53 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
54 |
|
55 |
-
if mode == "
|
56 |
-
#
|
|
|
|
|
|
|
57 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
58 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
|
|
59 |
elif mode == "low_noise":
|
60 |
-
# Gentle denoising for clear but noisy images
|
61 |
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
|
62 |
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
|
|
63 |
else:
|
64 |
-
# Standard preprocessing
|
65 |
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
|
66 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
72 |
-
cv2.THRESH_BINARY, 11, 2)
|
73 |
|
74 |
# Morphological operations
|
75 |
kernel = np.ones((3, 3), np.uint8)
|
76 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
77 |
|
78 |
-
#
|
79 |
-
|
80 |
-
sharpen_strength =
|
81 |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
82 |
sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
|
83 |
|
@@ -99,19 +98,20 @@ def extract_weight_from_image(pil_img):
|
|
99 |
img = np.array(pil_img)
|
100 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
101 |
|
102 |
-
# Estimate
|
103 |
-
|
104 |
-
conf_threshold = 0.
|
105 |
|
106 |
# Detect ROI
|
107 |
roi_img = detect_roi(img)
|
108 |
|
109 |
# Process multiple image versions
|
110 |
images_to_process = [
|
111 |
-
("
|
112 |
-
("
|
113 |
-
("
|
114 |
-
("
|
|
|
115 |
]
|
116 |
|
117 |
best_weight = None
|
@@ -135,6 +135,7 @@ def extract_weight_from_image(pil_img):
|
|
135 |
text = text.replace("b", "8").replace("B", "8")
|
136 |
text = text.replace("z", "2").replace("Z", "2")
|
137 |
text = text.replace("q", "9").replace("Q", "9")
|
|
|
138 |
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
139 |
text = re.sub(r"[^\d\.]", "", text)
|
140 |
|
@@ -144,7 +145,9 @@ def extract_weight_from_image(pil_img):
|
|
144 |
weight = float(text)
|
145 |
# Score based on realistic weight range (0.1–500 kg)
|
146 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
147 |
-
|
|
|
|
|
148 |
if score > best_score and conf > conf_threshold:
|
149 |
best_weight = text
|
150 |
best_conf = conf
|
|
|
10 |
# Initialize EasyOCR
|
11 |
easyocr_reader = easyocr.Reader(['en'], gpu=False)
|
12 |
|
13 |
+
def estimate_brightness(img):
|
14 |
+
"""Estimate image brightness to detect illuminated displays"""
|
15 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
16 |
+
return np.mean(gray)
|
|
|
|
|
|
|
|
|
17 |
|
18 |
def detect_roi(img):
|
19 |
"""Detect and crop the region of interest (likely the digital display)"""
|
20 |
try:
|
21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
22 |
+
# Threshold to isolate bright areas (like illuminated displays)
|
23 |
+
brightness = estimate_brightness(img)
|
24 |
+
thresh_value = 200 if brightness > 100 else 150 # Adjust based on brightness
|
25 |
+
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
|
26 |
+
# Dilate to connect digits
|
27 |
+
kernel = np.ones((7, 7), np.uint8)
|
28 |
+
dilated = cv2.dilate(thresh, kernel, iterations=2)
|
29 |
# Find contours
|
30 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
31 |
if contours:
|
32 |
# Get the largest contour with reasonable size
|
33 |
+
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
|
34 |
if valid_contours:
|
35 |
largest_contour = max(valid_contours, key=cv2.contourArea)
|
36 |
x, y, w, h = cv2.boundingRect(largest_contour)
|
37 |
+
# Add more padding and ensure bounds
|
38 |
+
x, y = max(0, x-30), max(0, y-30)
|
39 |
+
w, h = min(w+60, img.shape[1]-x), min(h+60, img.shape[0]-y)
|
40 |
+
if w > 50 and h > 30:
|
41 |
return img[y:y+h, x:x+w]
|
42 |
+
return img
|
43 |
except Exception as e:
|
44 |
logging.error(f"ROI detection failed: {str(e)}")
|
45 |
return img
|
|
|
49 |
try:
|
50 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
51 |
|
52 |
+
if mode == "seven_segment":
|
53 |
+
# Gentle preprocessing for seven-segment displays
|
54 |
+
denoised = cv2.GaussianBlur(gray, (5, 5), 0)
|
55 |
+
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
56 |
+
elif mode == "high_contrast":
|
57 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
58 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
59 |
+
thresh = clahe.apply(denoised)
|
60 |
elif mode == "low_noise":
|
|
|
61 |
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
|
62 |
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
63 |
+
thresh = clahe.apply(denoised)
|
64 |
else:
|
|
|
65 |
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
|
66 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
67 |
+
thresh = clahe.apply(denoised)
|
68 |
|
69 |
+
if mode != "seven_segment":
|
70 |
+
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
71 |
+
cv2.THRESH_BINARY, 11, 2)
|
|
|
|
|
72 |
|
73 |
# Morphological operations
|
74 |
kernel = np.ones((3, 3), np.uint8)
|
75 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
76 |
|
77 |
+
# Reduced sharpening for seven-segment displays
|
78 |
+
brightness = estimate_brightness(img)
|
79 |
+
sharpen_strength = 3 if mode == "seven_segment" or brightness > 100 else 5
|
80 |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
81 |
sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
|
82 |
|
|
|
98 |
img = np.array(pil_img)
|
99 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
100 |
|
101 |
+
# Estimate brightness for adaptive thresholding
|
102 |
+
brightness = estimate_brightness(img)
|
103 |
+
conf_threshold = 0.5 if brightness > 100 else 0.4 # Stricter for bright displays
|
104 |
|
105 |
# Detect ROI
|
106 |
roi_img = detect_roi(img)
|
107 |
|
108 |
# Process multiple image versions
|
109 |
images_to_process = [
|
110 |
+
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.3, 'allowlist': '0123456789.'}),
|
111 |
+
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1}),
|
112 |
+
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1}),
|
113 |
+
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1}),
|
114 |
+
("original", roi_img, {'contrast_ths': 0.3, 'allowlist': '0123456789.'})
|
115 |
]
|
116 |
|
117 |
best_weight = None
|
|
|
135 |
text = text.replace("b", "8").replace("B", "8")
|
136 |
text = text.replace("z", "2").replace("Z", "2")
|
137 |
text = text.replace("q", "9").replace("Q", "9")
|
138 |
+
text = text.replace("6", "2").replace("9", "2") # Specific correction for seven-segment
|
139 |
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
140 |
text = re.sub(r"[^\d\.]", "", text)
|
141 |
|
|
|
145 |
weight = float(text)
|
146 |
# Score based on realistic weight range (0.1–500 kg)
|
147 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
148 |
+
# Prefer two-digit weights for scales
|
149 |
+
digit_score = 1.1 if 10 <= weight < 100 else 1.0
|
150 |
+
score = conf * range_score * digit_score
|
151 |
if score > best_score and conf > conf_threshold:
|
152 |
best_weight = text
|
153 |
best_conf = conf
|