AutoWeightLogger1 / ocr_engine.py
Sanjayraju30's picture
Update ocr_engine.py
4c95d04 verified
raw
history blame
9.24 kB
import easyocr
import numpy as np
import cv2
import re
import logging
# Set up logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize EasyOCR
easyocr_reader = easyocr.Reader(['en'], gpu=False)
def estimate_brightness(img):
"""Estimate image brightness to detect illuminated displays"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return np.mean(gray)
def detect_roi(img):
"""Detect and crop the region of interest (likely the digital display)"""
try:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
brightness = estimate_brightness(img)
thresh_value = 230 if brightness > 100 else 190
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
kernel = np.ones((9, 9), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=3)
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
if valid_contours:
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
x, y = max(0, x-40), max(0, y-40)
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
return img[y:y+h, x:x+w], (x, y, w, h)
return img, None
except Exception as e:
logging.error(f"ROI detection failed: {str(e)}")
return img, None
def detect_segments(digit_img):
"""Detect seven-segment patterns in a digit image"""
h, w = digit_img.shape
if h < 10 or w < 10:
return None
# Define segment regions (top, middle, bottom, left-top, left-bottom, right-top, right-bottom)
segments = {
'top': (0, w, 0, h//5),
'middle': (0, w, 2*h//5, 3*h//5),
'bottom': (0, w, 4*h//5, h),
'left_top': (0, w//5, 0, h//2),
'left_bottom': (0, w//5, h//2, h),
'right_top': (4*w//5, w, 0, h//2),
'right_bottom': (4*w//5, w, h//2, h)
}
segment_presence = {}
for name, (x1, x2, y1, y2) in segments.items():
region = digit_img[y1:y2, x1:x2]
if region.size == 0:
return None
# Count white pixels in the region
pixel_count = np.sum(region == 255)
total_pixels = region.size
# Segment is present if more than 50% of the region is white
segment_presence[name] = pixel_count > total_pixels * 0.5
# Seven-segment digit patterns
digit_patterns = {
'0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
'1': ('right_top', 'right_bottom'),
'2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
'3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
'4': ('middle', 'left_top', 'right_top', 'right_bottom'),
'5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
'6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
'7': ('top', 'right_top', 'right_bottom'),
'8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
'9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
}
best_match = None
max_matches = 0
for digit, pattern in digit_patterns.items():
matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
score = matches - non_matches
if score > max_matches:
max_matches = score
best_match = digit
return best_match
def custom_seven_segment_ocr(img, roi_bbox):
"""Perform custom OCR for seven-segment displays"""
try:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Use EasyOCR to get bounding boxes for digits
results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
contrast_ths=0.1, adjust_contrast=0.7,
text_threshold=0.9, mag_ratio=1.5,
allowlist='0123456789.')
if not results:
return None
# Sort bounding boxes left to right
digits = []
for (bbox, _, _) in results:
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
x_min, x_max = min(x1, x4), max(x2, x3)
y_min, y_max = min(y1, y2), max(y3, y4)
digits.append((x_min, x_max, y_min, y_max))
digits.sort(key=lambda x: x[0]) # Sort by x_min (left to right)
# Extract and recognize each digit
recognized_text = ""
for x_min, x_max, y_min, y_max in digits:
x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
x_max, y_max = min(thresh.shape[1], int(x_max)), min(thresh.shape[0], int(y_max))
if x_max <= x_min or y_max <= y_min:
continue
digit_img = thresh[y_min:y_max, x_min:x_max]
digit = detect_segments(digit_img)
if digit:
recognized_text += digit
# Validate the recognized text
text = recognized_text
text = re.sub(r"[^\d\.]", "", text)
if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
return text
return None
except Exception as e:
logging.error(f"Custom seven-segment OCR failed: {str(e)}")
return None
def extract_weight_from_image(pil_img):
try:
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
brightness = estimate_brightness(img)
conf_threshold = 0.9 if brightness > 100 else 0.7
# Detect ROI
roi_img, roi_bbox = detect_roi(img)
# Try custom seven-segment OCR first
custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
if custom_result:
# Format the custom result
if "." in custom_result:
int_part, dec_part = custom_result.split(".")
int_part = int_part.lstrip("0") or "0"
custom_result = f"{int_part}.{dec_part.rstrip('0')}"
else:
custom_result = custom_result.lstrip('0') or "0"
return custom_result, 100.0 # High confidence for custom OCR
# Fallback to EasyOCR if custom OCR fails
images_to_process = [
("raw", roi_img, {'contrast_ths': 0.1, 'adjust_contrast': 0.7, 'text_threshold': 0.9, 'mag_ratio': 1.5, 'allowlist': '0123456789.'}),
]
best_weight = None
best_conf = 0.0
best_score = 0.0
for mode, proc_img, ocr_params in images_to_process:
if mode == "raw":
proc_img = cv2.cvtColor(proc_img, cv2.COLOR_BGR2GRAY)
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
for (bbox, text, conf) in results:
text = text.lower().strip()
text = text.replace(",", ".").replace(";", ".")
text = text.replace("o", "0").replace("O", "0")
text = text.replace("s", "5").replace("S", "5")
text = text.replace("g", "9").replace("G", "6")
text = text.replace("l", "1").replace("I", "1")
text = text.replace("b", "8").replace("B", "8")
text = text.replace("z", "2").replace("Z", "2")
text = text.replace("q", "9").replace("Q", "9")
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
text = re.sub(r"[^\d\.]", "", text)
if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
try:
weight = float(text)
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
digit_score = 1.5 if 10 <= weight < 100 else 1.0
score = conf * range_score * digit_score
if score > best_score and conf > conf_threshold:
best_weight = text
best_conf = conf
best_score = score
except ValueError:
continue
if not best_weight:
logging.info("No valid weight detected")
return "Not detected", 0.0
if "." in best_weight:
int_part, dec_part = best_weight.split(".")
int_part = int_part.lstrip("0") or "0"
best_weight = f"{int_part}.{dec_part.rstrip('0')}"
else:
best_weight = best_weight.lstrip('0') or "0"
return best_weight, round(best_conf * 100, 2)
except Exception as e:
logging.error(f"Weight extraction failed: {str(e)}")
return "Not detected", 0.0