Spaces:
Running
Running
import easyocr | |
import numpy as np | |
import cv2 | |
import re | |
import logging | |
from datetime import datetime | |
import os | |
from PIL import Image, ImageEnhance | |
import pytesseract | |
# Set up logging for detailed debugging | |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Initialize EasyOCR (enable GPU if available) | |
easyocr_reader = easyocr.Reader(['en'], gpu=False) | |
# Directory for debug images | |
DEBUG_DIR = "debug_images" | |
os.makedirs(DEBUG_DIR, exist_ok=True) | |
def save_debug_image(img, filename_suffix, prefix=""): | |
"""Saves an image to the debug directory with a timestamp.""" | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") | |
filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png") | |
if len(img.shape) == 3: # Color image | |
cv2.imwrite(filename, img) | |
else: # Grayscale image | |
cv2.imwrite(filename, img) | |
logging.debug(f"Saved debug image: {filename}") | |
def estimate_brightness(img): | |
"""Estimate image brightness to adjust processing""" | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
brightness = np.mean(gray) | |
logging.debug(f"Estimated brightness: {brightness}") | |
return brightness | |
def deblur_image(img): | |
"""Apply iterative sharpening to reduce blur""" | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
# Multiple sharpening passes | |
for _ in range(2): | |
kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) | |
gray = cv2.filter2D(gray, -1, kernel) | |
gray = np.clip(gray, 0, 255).astype(np.uint8) | |
save_debug_image(gray, "00_deblurred") | |
return gray | |
def preprocess_image(img): | |
"""Enhance image for digit detection under adverse conditions""" | |
# PIL enhancement | |
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
pil_img = ImageEnhance.Contrast(pil_img).enhance(3.0) # Extreme contrast | |
pil_img = ImageEnhance.Brightness(pil_img).enhance(1.8) # Strong brightness | |
img_enhanced = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) | |
save_debug_image(img_enhanced, "00_preprocessed_pil") | |
# Deblur | |
deblurred = deblur_image(img_enhanced) | |
# CLAHE for local contrast | |
clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8)) | |
enhanced = clahe.apply(deblurred) | |
save_debug_image(enhanced, "00_clahe_enhanced") | |
# Noise reduction | |
filtered = cv2.bilateralFilter(enhanced, d=17, sigmaColor=200, sigmaSpace=200) | |
save_debug_image(filtered, "00_bilateral_filtered") | |
# Morphological cleaning | |
kernel = np.ones((5, 5), np.uint8) | |
filtered = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel, iterations=2) | |
save_debug_image(filtered, "00_morph_cleaned") | |
return filtered | |
def normalize_image(img): | |
"""Resize image to ensure digits are detectable""" | |
h, w = img.shape[:2] | |
target_height = 1080 # High resolution for small digits | |
aspect_ratio = w / h | |
target_width = int(target_height * aspect_ratio) | |
if target_width < 480: | |
target_width = 480 | |
target_height = int(target_width / aspect_ratio) | |
resized = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_CUBIC) | |
save_debug_image(resized, "00_normalized") | |
logging.debug(f"Normalized image to {target_width}x{target_height}") | |
return resized | |
def tesseract_ocr(img): | |
"""Fallback OCR using Tesseract""" | |
try: | |
config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.-' | |
text = pytesseract.image_to_string(img, config=config).strip() | |
logging.info(f"Tesseract OCR raw text: {text}") | |
return text | |
except Exception as e: | |
logging.error(f"Tesseract OCR failed: {str(e)}") | |
return None | |
def extract_weight_from_image(pil_img): | |
"""Extract the actual weight shown in the image""" | |
try: | |
img = np.array(pil_img) | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
save_debug_image(img, "00_input_image") | |
# Normalize image | |
img = normalize_image(img) | |
brightness = estimate_brightness(img) | |
conf_threshold = 0.1 # Very low threshold for blurry images | |
# Preprocess entire image (bypass ROI detection) | |
processed_img = preprocess_image(img) | |
save_debug_image(processed_img, "01_processed_full") | |
# Try multiple thresholding approaches | |
if brightness > 100: | |
thresh = cv2.adaptiveThreshold(processed_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
cv2.THRESH_BINARY_INV, 61, 11) | |
save_debug_image(thresh, "02_adaptive_threshold") | |
else: | |
_, thresh = cv2.threshold(processed_img, 10, 255, cv2.THRESH_BINARY_INV) | |
save_debug_image(thresh, "02_simple_threshold") | |
# Morphological operations | |
kernel = np.ones((7, 7), np.uint8) | |
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3) | |
save_debug_image(thresh, "02_morph_cleaned") | |
# EasyOCR attempt | |
results = easyocr_reader.readtext(thresh, detail=1, paragraph=False, | |
contrast_ths=0.05, adjust_contrast=1.5, | |
text_threshold=0.05, mag_ratio=10.0, | |
allowlist='0123456789.-', y_ths=0.8) | |
logging.info(f"EasyOCR results: {results}") | |
recognized_text = "" | |
if results: | |
# Sort by x-coordinate for left-to-right reading | |
sorted_results = sorted(results, key=lambda x: x[0][0][0]) | |
for _, text, conf in sorted_results: | |
logging.info(f"EasyOCR detected: {text}, Confidence: {conf}") | |
if conf > conf_threshold and any(c in '0123456789.-' for c in text): | |
recognized_text += text | |
else: | |
logging.info("EasyOCR found no digits.") | |
if not recognized_text: | |
# Tesseract fallback | |
tesseract_result = tesseract_ocr(thresh) | |
if tesseract_result: | |
recognized_text = tesseract_result | |
logging.info(f"Using Tesseract result: {recognized_text}") | |
logging.info(f"Raw recognized text: {recognized_text}") | |
if not recognized_text: | |
logging.info("No text detected by EasyOCR or Tesseract.") | |
return "Not detected", 0.0 | |
# Minimal cleaning to preserve actual weight | |
text = recognized_text.lower().strip() | |
text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "") | |
text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0") | |
text = text.replace("s", "5").replace("S", "5").replace("g", "9").replace("G", "6") | |
text = text.replace("l", "1").replace("I", "1").replace("|", "1") | |
text = text.replace("b", "8").replace("B", "8").replace("z", "2").replace("Z", "2") | |
text = text.replace("a", "4").replace("A", "4").replace("e", "3").replace("t", "7") | |
text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text) | |
text = re.sub(r"[^\d\.\-]", "", text) | |
if text.count('.') > 1: | |
parts = text.split('.') | |
text = parts[0] + '.' + ''.join(parts[1:]) | |
text = text.strip('.') | |
if text.startswith('.'): | |
text = "0" + text | |
if text.endswith('.'): | |
text = text.rstrip('.') | |
logging.info(f"Cleaned text: {text}") | |
if not text or text == '.' or text == '-': | |
logging.warning("Cleaned text is invalid.") | |
return "Not detected", 0.0 | |
try: | |
weight = float(text) | |
confidence = 80.0 if recognized_text else 50.0 | |
if weight < -1000 or weight > 2000: | |
logging.warning(f"Weight {weight} outside typical range, reducing confidence.") | |
confidence *= 0.5 | |
if "." in text: | |
int_part, dec_part = text.split(".") | |
int_part = int_part.lstrip("0") or "0" | |
dec_part = dec_part.rstrip('0') | |
if not dec_part and int_part != "0": | |
text = int_part | |
elif not dec_part and int_part == "0": | |
text = "0" | |
else: | |
text = f"{int_part}.{dec_part}" | |
else: | |
text = text.lstrip('0') or "0" | |
logging.info(f"Final detected weight: {text}, Confidence: {confidence}%") | |
return text, confidence | |
except ValueError: | |
logging.warning(f"Could not convert '{text}' to float.") | |
return "Not detected", 0.0 | |
except Exception as e: | |
logging.error(f"Weight extraction failed unexpectedly: {str(e)}") | |
return "Not detected", 0.0 |