Spaces:
Running
Running
import easyocr | |
import numpy as np | |
import cv2 | |
import re | |
import logging | |
# Set up logging for debugging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Initialize EasyOCR | |
easyocr_reader = easyocr.Reader(['en'], gpu=False) | |
def estimate_brightness(img): | |
"""Estimate image brightness to detect illuminated displays""" | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
return np.mean(gray) | |
def detect_roi(img): | |
"""Detect and crop the region of interest (likely the digital display)""" | |
try: | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
# Stricter threshold for bright areas | |
brightness = estimate_brightness(img) | |
thresh_value = 230 if brightness > 100 else 190 | |
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY) | |
# Morphological operations to connect digits | |
kernel = np.ones((9, 9), np.uint8) | |
dilated = cv2.dilate(thresh, kernel, iterations=3) | |
# Find contours | |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
if contours: | |
# Filter contours by size and aspect ratio | |
valid_contours = [c for c in contours if cv2.contourArea(c) > 500] | |
if valid_contours: | |
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True): | |
x, y, w, h = cv2.boundingRect(contour) | |
aspect_ratio = w / h | |
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30: | |
x, y = max(0, x-40), max(0, y-40) | |
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y) | |
return img[y:y+h, x:x+w] | |
return img | |
except Exception as e: | |
logging.error(f"ROI detection failed: {str(e)}") | |
return img | |
def correct_seven_segment(text, bbox, img): | |
"""Correct common seven-segment misreads based on bounding box and pixel distribution""" | |
if "2" in text or "6" in text: | |
# Extract bounding box coordinates | |
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox | |
x_min, x_max = min(x1, x4), max(x2, x3) | |
y_min, y_max = min(y1, y2), max(y3, y4) | |
# Ensure bounds are within image | |
x_min, y_min = max(0, int(x_min)), max(0, int(y_min)) | |
x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max)) | |
if x_max <= x_min or y_max <= y_min: | |
return text | |
# Crop the digit area | |
digit_area = img[y_min:y_max, x_min:x_max] | |
if digit_area.size == 0: | |
return text | |
# Convert to grayscale and threshold | |
gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY) | |
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
# Calculate pixel distribution in upper vs lower half | |
h, w = thresh.shape | |
upper_half = thresh[:h//2, :] | |
lower_half = thresh[h//2:, :] | |
upper_pixels = np.sum(upper_half == 255) | |
lower_pixels = np.sum(lower_half == 255) | |
# "6" has more pixels in the lower half due to the loop, "2" is more balanced | |
if lower_pixels > upper_pixels * 1.5: | |
text = text.replace("2", "6") | |
else: | |
text = text.replace("6", "2") | |
return text | |
def enhance_image(img, mode="standard"): | |
"""Enhance image with different modes for multi-scale processing""" | |
try: | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
if mode == "seven_segment": | |
# Extremely minimal preprocessing for seven-segment displays | |
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
return thresh | |
elif mode == "minimal": | |
# No blurring, just threshold | |
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
return thresh | |
elif mode == "raw": | |
# No preprocessing, just convert to grayscale | |
return gray | |
elif mode == "high_contrast": | |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100) | |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) | |
thresh = clahe.apply(denoised) | |
elif mode == "low_noise": | |
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50) | |
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8)) | |
thresh = clahe.apply(denoised) | |
else: | |
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75) | |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) | |
thresh = clahe.apply(denoised) | |
if mode not in ["seven_segment", "minimal", "raw"]: | |
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
cv2.THRESH_BINARY, 11, 2) | |
# Morphological operations only for non-seven-segment modes | |
if mode not in ["seven_segment", "minimal", "raw"]: | |
kernel = np.ones((3, 3), np.uint8) | |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1) | |
else: | |
morphed = thresh | |
# Skip sharpening for seven-segment, minimal, and raw modes | |
if mode not in ["seven_segment", "minimal", "raw"]: | |
brightness = estimate_brightness(img) | |
sharpen_strength = 3 if brightness > 100 else 5 | |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]]) | |
morphed = cv2.filter2D(morphed, -1, sharpen_kernel) | |
# Dynamic resizing | |
h, w = morphed.shape | |
target_size = 800 | |
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0) | |
if scale_factor != 1.0: | |
morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor, | |
interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA) | |
return morphed | |
except Exception as e: | |
logging.error(f"Image enhancement failed (mode={mode}): {str(e)}") | |
return img | |
def extract_weight_from_image(pil_img): | |
try: | |
img = np.array(pil_img) | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
# Estimate brightness for adaptive thresholding | |
brightness = estimate_brightness(img) | |
conf_threshold = 0.8 if brightness > 100 else 0.6 # Stricter for bright displays | |
# Detect ROI | |
roi_img = detect_roi(img) | |
# Process multiple image versions | |
images_to_process = [ | |
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}), | |
("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}), | |
("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}), | |
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}), | |
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}), | |
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}), | |
] | |
best_weight = None | |
best_conf = 0.0 | |
best_score = 0.0 | |
for mode, proc_img, ocr_params in images_to_process: | |
# EasyOCR detection | |
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params) | |
for (bbox, text, conf) in results: | |
# Apply seven-segment correction | |
text = correct_seven_segment(text, bbox, roi_img) | |
original_text = text | |
text = text.lower().strip() | |
# Fix common OCR errors | |
text = text.replace(",", ".").replace(";", ".") | |
text = text.replace("o", "0").replace("O", "0") | |
text = text.replace("s", "5").replace("S", "5") | |
text = text.replace("g", "9").replace("G", "6") | |
text = text.replace("l", "1").replace("I", "1") | |
text = text.replace("b", "8").replace("B", "8") | |
text = text.replace("z", "2").replace("Z", "2") | |
text = text.replace("q", "9").replace("Q", "9") | |
text = text.replace("kgs", "").replace("kg", "").replace("k", "") | |
text = re.sub(r"[^\d\.]", "", text) | |
# Regex for weight (0.0 to 9999.999) | |
if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text): | |
try: | |
weight = float(text) | |
# Score based on realistic weight range (0.1–500 kg) | |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3 | |
# Prefer two-digit weights for scales | |
digit_score = 1.5 if 10 <= weight < 100 else 1.0 | |
score = conf * range_score * digit_score | |
if score > best_score and conf > conf_threshold: | |
best_weight = text | |
best_conf = conf | |
best_score = score | |
except ValueError: | |
continue | |
if not best_weight: | |
logging.info("No valid weight detected") | |
return "Not detected", 0.0 | |
# Format output | |
if "." in best_weight: | |
int_part, dec_part = best_weight.split(".") | |
int_part = int_part.lstrip("0") or "0" | |
best_weight = f"{int_part}.{dec_part.rstrip('0')}" | |
else: | |
best_weight = best_weight.lstrip('0') or "0" | |
return best_weight, round(best_conf * 100, 2) | |
except Exception as e: | |
logging.error(f"Weight extraction failed: {str(e)}") | |
return "Not detected", 0.0 |