import easyocr import numpy as np import cv2 import re import logging from datetime import datetime import os from PIL import Image, ImageEnhance import pytesseract # Set up logging for detailed debugging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') # Initialize EasyOCR (enable GPU if available) easyocr_reader = easyocr.Reader(['en'], gpu=False) # Directory for debug images DEBUG_DIR = "debug_images" os.makedirs(DEBUG_DIR, exist_ok=True) def save_debug_image(img, filename_suffix, prefix=""): """Saves an image to the debug directory with a timestamp.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png") if len(img.shape) == 3: # Color image cv2.imwrite(filename, img) else: # Grayscale image cv2.imwrite(filename, img) logging.debug(f"Saved debug image: {filename}") def estimate_brightness(img): """Estimate image brightness to adjust processing""" gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) brightness = np.mean(gray) logging.debug(f"Estimated brightness: {brightness}") return brightness def deblur_image(img): """Apply iterative sharpening to reduce blur""" gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Multiple sharpening passes for _ in range(2): kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) gray = cv2.filter2D(gray, -1, kernel) gray = np.clip(gray, 0, 255).astype(np.uint8) save_debug_image(gray, "00_deblurred") return gray def preprocess_image(img): """Enhance image for digit detection under adverse conditions""" # PIL enhancement pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) pil_img = ImageEnhance.Contrast(pil_img).enhance(3.0) # Extreme contrast pil_img = ImageEnhance.Brightness(pil_img).enhance(1.8) # Strong brightness img_enhanced = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) save_debug_image(img_enhanced, "00_preprocessed_pil") # Deblur deblurred = deblur_image(img_enhanced) # CLAHE for local contrast clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8)) enhanced = clahe.apply(deblurred) save_debug_image(enhanced, "00_clahe_enhanced") # Noise reduction filtered = cv2.bilateralFilter(enhanced, d=17, sigmaColor=200, sigmaSpace=200) save_debug_image(filtered, "00_bilateral_filtered") # Morphological cleaning kernel = np.ones((5, 5), np.uint8) filtered = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel, iterations=2) save_debug_image(filtered, "00_morph_cleaned") return filtered def normalize_image(img): """Resize image to ensure digits are detectable""" h, w = img.shape[:2] target_height = 1080 # High resolution for small digits aspect_ratio = w / h target_width = int(target_height * aspect_ratio) if target_width < 480: target_width = 480 target_height = int(target_width / aspect_ratio) resized = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_CUBIC) save_debug_image(resized, "00_normalized") logging.debug(f"Normalized image to {target_width}x{target_height}") return resized def tesseract_ocr(img): """Fallback OCR using Tesseract""" try: config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.-' text = pytesseract.image_to_string(img, config=config).strip() logging.info(f"Tesseract OCR raw text: {text}") return text except Exception as e: logging.error(f"Tesseract OCR failed: {str(e)}") return None def extract_weight_from_image(pil_img): """Extract the actual weight shown in the image""" try: img = np.array(pil_img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) save_debug_image(img, "00_input_image") # Normalize image img = normalize_image(img) brightness = estimate_brightness(img) conf_threshold = 0.1 # Very low threshold for blurry images # Preprocess entire image (bypass ROI detection) processed_img = preprocess_image(img) save_debug_image(processed_img, "01_processed_full") # Try multiple thresholding approaches if brightness > 100: thresh = cv2.adaptiveThreshold(processed_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 61, 11) save_debug_image(thresh, "02_adaptive_threshold") else: _, thresh = cv2.threshold(processed_img, 10, 255, cv2.THRESH_BINARY_INV) save_debug_image(thresh, "02_simple_threshold") # Morphological operations kernel = np.ones((7, 7), np.uint8) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3) save_debug_image(thresh, "02_morph_cleaned") # EasyOCR attempt results = easyocr_reader.readtext(thresh, detail=1, paragraph=False, contrast_ths=0.05, adjust_contrast=1.5, text_threshold=0.05, mag_ratio=10.0, allowlist='0123456789.-', y_ths=0.8) logging.info(f"EasyOCR results: {results}") recognized_text = "" if results: # Sort by x-coordinate for left-to-right reading sorted_results = sorted(results, key=lambda x: x[0][0][0]) for _, text, conf in sorted_results: logging.info(f"EasyOCR detected: {text}, Confidence: {conf}") if conf > conf_threshold and any(c in '0123456789.-' for c in text): recognized_text += text else: logging.info("EasyOCR found no digits.") if not recognized_text: # Tesseract fallback tesseract_result = tesseract_ocr(thresh) if tesseract_result: recognized_text = tesseract_result logging.info(f"Using Tesseract result: {recognized_text}") logging.info(f"Raw recognized text: {recognized_text}") if not recognized_text: logging.info("No text detected by EasyOCR or Tesseract.") return "Not detected", 0.0 # Minimal cleaning to preserve actual weight text = recognized_text.lower().strip() text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "") text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0") text = text.replace("s", "5").replace("S", "5").replace("g", "9").replace("G", "6") text = text.replace("l", "1").replace("I", "1").replace("|", "1") text = text.replace("b", "8").replace("B", "8").replace("z", "2").replace("Z", "2") text = text.replace("a", "4").replace("A", "4").replace("e", "3").replace("t", "7") text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text) text = re.sub(r"[^\d\.\-]", "", text) if text.count('.') > 1: parts = text.split('.') text = parts[0] + '.' + ''.join(parts[1:]) text = text.strip('.') if text.startswith('.'): text = "0" + text if text.endswith('.'): text = text.rstrip('.') logging.info(f"Cleaned text: {text}") if not text or text == '.' or text == '-': logging.warning("Cleaned text is invalid.") return "Not detected", 0.0 try: weight = float(text) confidence = 80.0 if recognized_text else 50.0 if weight < -1000 or weight > 2000: logging.warning(f"Weight {weight} outside typical range, reducing confidence.") confidence *= 0.5 if "." in text: int_part, dec_part = text.split(".") int_part = int_part.lstrip("0") or "0" dec_part = dec_part.rstrip('0') if not dec_part and int_part != "0": text = int_part elif not dec_part and int_part == "0": text = "0" else: text = f"{int_part}.{dec_part}" else: text = text.lstrip('0') or "0" logging.info(f"Final detected weight: {text}, Confidence: {confidence}%") return text, confidence except ValueError: logging.warning(f"Could not convert '{text}' to float.") return "Not detected", 0.0 except Exception as e: logging.error(f"Weight extraction failed unexpectedly: {str(e)}") return "Not detected", 0.0