AutoWeightLogger1 / ocr_engine.py
Sanjayraju30's picture
Update ocr_engine.py
7c31f9a verified
raw
history blame
8.99 kB
import easyocr
import numpy as np
import cv2
import re
import logging
# Set up logging for debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Initialize EasyOCR
easyocr_reader = easyocr.Reader(['en'], gpu=False)
def estimate_brightness(img):
"""Estimate image brightness to detect illuminated displays"""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return np.mean(gray)
def detect_roi(img):
"""Detect and crop the region of interest (likely the digital display)"""
try:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Stricter threshold for bright areas
brightness = estimate_brightness(img)
thresh_value = 220 if brightness > 100 else 180
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
# Morphological operations to connect digits
kernel = np.ones((9, 9), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=3)
# Find contours
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# Filter contours by size and aspect ratio (typical for displays)
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
if valid_contours:
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30: # Typical display aspect ratio
x, y = max(0, x-40), max(0, y-40)
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
return img[y:y+h, x:x+w]
return img
except Exception as e:
logging.error(f"ROI detection failed: {str(e)}")
return img
def correct_seven_segment(text, bbox):
"""Correct common seven-segment misreads based on bounding box shape"""
if "6" in text:
# Check bounding box aspect ratio to differentiate "6" from "2"
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
width = abs(x2 - x1)
height = abs(y2 - y1)
aspect_ratio = width / height if height > 0 else 1.0
# "2" typically has a more rectangular shape in seven-segment
if aspect_ratio > 0.5: # Adjust based on typical "2" vs "6" shapes
text = text.replace("6", "2")
return text
def enhance_image(img, mode="standard"):
"""Enhance image with different modes for multi-scale processing"""
try:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
if mode == "seven_segment":
# Minimal preprocessing for seven-segment displays
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return thresh
elif mode == "minimal":
# Very light preprocessing
denoised = cv2.GaussianBlur(gray, (3, 3), 0)
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return thresh
elif mode == "high_contrast":
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
thresh = clahe.apply(denoised)
elif mode == "low_noise":
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
thresh = clahe.apply(denoised)
else:
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
thresh = clahe.apply(denoised)
if mode not in ["seven_segment", "minimal"]:
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
# Morphological operations
kernel = np.ones((3, 3), np.uint8)
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
# Skip sharpening for seven-segment and minimal modes
if mode not in ["seven_segment", "minimal"]:
brightness = estimate_brightness(img)
sharpen_strength = 3 if brightness > 100 else 5
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
# Dynamic resizing
h, w = morphed.shape
target_size = 800
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
if scale_factor != 1.0:
morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
return morphed
except Exception as e:
logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
return img
def extract_weight_from_image(pil_img):
try:
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# Estimate brightness for adaptive thresholding
brightness = estimate_brightness(img)
conf_threshold = 0.7 if brightness > 100 else 0.5 # Stricter for bright displays
# Detect ROI
roi_img = detect_roi(img)
# Process multiple image versions
images_to_process = [
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
("original", roi_img, {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'})
]
best_weight = None
best_conf = 0.0
best_score = 0.0
for mode, proc_img, ocr_params in images_to_process:
# EasyOCR detection
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
for (bbox, text, conf) in results:
# Apply seven-segment correction
text = correct_seven_segment(text, bbox)
original_text = text
text = text.lower().strip()
# Fix common OCR errors
text = text.replace(",", ".").replace(";", ".")
text = text.replace("o", "0").replace("O", "0")
text = text.replace("s", "5").replace("S", "5")
text = text.replace("g", "9").replace("G", "6")
text = text.replace("l", "1").replace("I", "1")
text = text.replace("b", "8").replace("B", "8")
text = text.replace("z", "2").replace("Z", "2")
text = text.replace("q", "9").replace("Q", "9")
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
text = re.sub(r"[^\d\.]", "", text)
# Regex for weight (0.0 to 9999.999)
if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
try:
weight = float(text)
# Score based on realistic weight range (0.1–500 kg)
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
# Strongly prefer two-digit weights for scales
digit_score = 1.5 if 10 <= weight < 100 else 1.0
score = conf * range_score * digit_score
if score > best_score and conf > conf_threshold:
best_weight = text
best_conf = conf
best_score = score
except ValueError:
continue
if not best_weight:
logging.info("No valid weight detected")
return "Not detected", 0.0
# Format output
if "." in best_weight:
int_part, dec_part = best_weight.split(".")
int_part = int_part.lstrip("0") or "0"
best_weight = f"{int_part}.{dec_part.rstrip('0')}"
else:
best_weight = best_weight.lstrip('0') or "0"
return best_weight, round(best_conf * 100, 2)
except Exception as e:
logging.error(f"Weight extraction failed: {str(e)}")
return "Not detected", 0.0