AutoWeightLogger1

Running

App Files Files Community

AutoWeightLogger1 / ocr_engine.py

Sanjayraju30

Update ocr_engine.py

204176c verified 4 days ago

raw

history blame

19.6 kB

	import easyocr
	import numpy as np
	import cv2
	import re
	import logging
	from datetime import datetime
	import os
	from PIL import Image, ImageEnhance
	from scipy.signal import convolve2d

	# Set up logging for detailed debugging
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

	# Initialize EasyOCR with English (enable GPU if available)
	easyocr_reader = easyocr.Reader(['en'], gpu=False)

	# Directory for debug images
	DEBUG_DIR = "debug_images"
	os.makedirs(DEBUG_DIR, exist_ok=True)

	def save_debug_image(img, filename_suffix, prefix=""):
	"""Saves an image to the debug directory with a timestamp."""
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
	filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
	if len(img.shape) == 3: # Color image
	cv2.imwrite(filename, img)
	else: # Grayscale image
	cv2.imwrite(filename, img)
	logging.debug(f"Saved debug image: {filename}")

	def estimate_brightness(img):
	"""Estimate image brightness to adjust processing"""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	brightness = np.mean(gray)
	logging.debug(f"Estimated brightness: {brightness}")
	return brightness

	def deblur_image(img):
	"""Apply deconvolution to reduce blur (approximate Wiener filter)"""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	# Create a simple point spread function (PSF) for deblurring
	psf = np.ones((5, 5)) / 25
	# Normalize image to float32
	img_float = gray.astype(np.float32) / 255.0
	# Convolve with PSF (simulate blur)
	img_blurred = convolve2d(img_float, psf, mode='same')
	# Avoid division by zero
	img_blurred = np.where(img_blurred == 0, 1e-10, img_blurred)
	# Deconvolve
	img_deblurred = img_float / img_blurred
	img_deblurred = np.clip(img_deblurred * 255, 0, 255).astype(np.uint8)
	save_debug_image(img_deblurred, "00_deblurred")
	return img_deblurred

	def preprocess_image(img):
	"""Enhance contrast, brightness, reduce noise, and deblur for digit detection"""
	# Deblur first
	deblurred = deblur_image(img)

	# Convert to PIL for enhancement
	pil_img = Image.fromarray(deblurred)
	pil_img = ImageEnhance.Contrast(pil_img).enhance(2.5) # Aggressive contrast
	pil_img = ImageEnhance.Brightness(pil_img).enhance(1.5) # Stronger brightness
	img_enhanced = np.array(pil_img)
	save_debug_image(img_enhanced, "00_preprocessed_pil")

	# Apply CLAHE for local contrast enhancement
	clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
	enhanced = clahe.apply(img_enhanced)
	save_debug_image(enhanced, "00_clahe_enhanced")

	# Aggressive noise reduction
	filtered = cv2.bilateralFilter(enhanced, d=15, sigmaColor=150, sigmaSpace=150)
	save_debug_image(filtered, "00_bilateral_filtered")
	return filtered

	def normalize_image(img):
	"""Resize image to standard dimensions while preserving aspect ratio"""
	h, w = img.shape[:2]
	target_height = 720
	aspect_ratio = w / h
	target_width = int(target_height * aspect_ratio)
	if target_width < 320:
	target_width = 320
	target_height = int(target_width / aspect_ratio)
	resized = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
	save_debug_image(resized, "00_normalized")
	logging.debug(f"Normalized image to {target_width}x{target_height}")
	return resized

	def detect_roi(img):
	"""Detect the digital display region, with fallback to full image"""
	try:
	save_debug_image(img, "01_original")
	gray = preprocess_image(img)
	save_debug_image(gray, "02_preprocessed_grayscale")

	# Try multiple thresholding methods
	brightness = estimate_brightness(img)
	if brightness > 120:
	thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, 41, 7) # Inverted for bright displays
	save_debug_image(thresh, "03_roi_adaptive_threshold_high")
	else:
	_, thresh = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY_INV) # Low threshold for dim displays
	save_debug_image(thresh, "03_roi_simple_threshold_low")

	# Morphological operations to connect digits
	kernel = np.ones((7, 7), np.uint8)
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
	save_debug_image(thresh, "03_roi_morph_cleaned")

	kernel = np.ones((15, 15), np.uint8)
	dilated = cv2.dilate(thresh, kernel, iterations=6)
	save_debug_image(dilated, "04_roi_dilated")

	contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	if contours:
	img_area = img.shape[0] * img.shape[1]
	valid_contours = []
	for c in contours:
	area = cv2.contourArea(c)
	if 100 < area < (img_area * 0.999): # Extremely relaxed area filter
	x, y, w, h = cv2.boundingRect(c)
	aspect_ratio = w / h if h > 0 else 0
	if 0.3 <= aspect_ratio <= 15.0 and w > 20 and h > 10: # Very relaxed filters
	valid_contours.append(c)

	if valid_contours:
	contour = max(valid_contours, key=cv2.contourArea)
	x, y, w, h = cv2.boundingRect(contour)
	padding = 120 # Very generous padding
	x, y = max(0, x - padding), max(0, y - padding)
	w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
	roi_img = img[y:y+h, x:x+w]
	save_debug_image(roi_img, "05_detected_roi")
	logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
	return roi_img, (x, y, w, h)

	logging.info("No suitable ROI found, returning full image.")
	save_debug_image(img, "05_no_roi_full_fallback")
	return img, None
	except Exception as e:
	logging.error(f"ROI detection failed: {str(e)}")
	save_debug_image(img, "05_roi_detection_error_fallback")
	return img, None

	def detect_segments(digit_img):
	"""Detect seven-segment patterns in a digit image"""
	h, w = digit_img.shape
	if h < 6 or w < 3: # Extremely relaxed size constraints
	logging.debug(f"Digit image too small: {w}x{h}")
	return None

	segments = {
	'top': (int(w0.05), int(w0.95), 0, int(h*0.3)),
	'middle': (int(w0.05), int(w0.95), int(h0.35), int(h0.65)),
	'bottom': (int(w0.05), int(w0.95), int(h*0.7), h),
	'left_top': (0, int(w0.35), int(h0.05), int(h*0.55)),
	'left_bottom': (0, int(w0.35), int(h0.45), int(h*0.95)),
	'right_top': (int(w0.65), w, int(h0.05), int(h*0.55)),
	'right_bottom': (int(w0.65), w, int(h0.45), int(h*0.95))
	}

	segment_presence = {}
	for name, (x1, x2, y1, y2) in segments.items():
	x1, y1 = max(0, x1), max(0, y1)
	x2, y2 = min(w, x2), min(h, y2)
	region = digit_img[y1:y2, x1:x2]
	if region.size == 0:
	segment_presence[name] = False
	continue
	pixel_count = np.sum(region == 255)
	total_pixels = region.size
	segment_presence[name] = pixel_count / total_pixels > 0.25 # Very low threshold
	logging.debug(f"Segment {name}: {pixel_count}/{total_pixels} = {pixel_count/total_pixels:.2f}")

	digit_patterns = {
	'0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
	'1': ('right_top', 'right_bottom'),
	'2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
	'3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
	'4': ('middle', 'left_top', 'right_top', 'right_bottom'),
	'5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
	'6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
	'7': ('top', 'right_top', 'right_bottom'),
	'8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
	'9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
	}

	best_match = None
	max_score = -1
	for digit, pattern in digit_patterns.items():
	matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
	non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
	current_score = matches - non_matches_penalty
	if all(segment_presence.get(s, False) for s in pattern):
	current_score += 0.5
	if current_score > max_score:
	max_score = current_score
	best_match = digit
	elif current_score == max_score and best_match is not None:
	current_digit_non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
	best_digit_pattern = digit_patterns[best_match]
	best_digit_non_matches = sum(1 for segment in segment_presence if segment not in best_digit_pattern and segment_presence[segment])
	if current_digit_non_matches < best_digit_non_matches:
	best_match = digit

	logging.debug(f"Segment presence: {segment_presence}, Detected digit: {best_match}")
	return best_match

	def custom_seven_segment_ocr(img, roi_bbox):
	"""Perform custom OCR for seven-segment displays"""
	try:
	gray = preprocess_image(img)
	brightness = estimate_brightness(img)
	# Multiple thresholding approaches
	if brightness > 120:
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
	save_debug_image(thresh, "06_roi_otsu_threshold")
	else:
	_, thresh = cv2.threshold(gray, 15, 255, cv2.THRESH_BINARY_INV) # Very low threshold
	save_debug_image(thresh, "06_roi_simple_threshold")

	# Morphological cleaning
	kernel = np.ones((5, 5), np.uint8)
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
	save_debug_image(thresh, "06_roi_morph_cleaned")

	results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
	contrast_ths=0.05, adjust_contrast=1.2,
	text_threshold=0.2, mag_ratio=6.0,
	allowlist='0123456789.-', y_ths=0.7)

	logging.info(f"Custom OCR EasyOCR results: {results}")
	if not results:
	logging.info("Custom OCR EasyOCR found no digits.")
	return None

	digits_info = []
	for (bbox, text, conf) in results:
	(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
	h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
	if len(text) <= 2 and any(c in '0123456789.-' for c in text) and h_bbox > 3:
	x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
	y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
	digits_info.append((x_min, x_max, y_min, y_max, text, conf))

	digits_info.sort(key=lambda x: x[0])
	recognized_text = ""
	for idx, (x_min, x_max, y_min, y_max, easyocr_char, easyocr_conf) in enumerate(digits_info):
	x_min, y_min = max(0, x_min), max(0, y_min)
	x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
	if x_max <= x_min or y_max <= y_min:
	continue
	digit_img_crop = thresh[y_min:y_max, x_min:x_max]
	save_debug_image(digit_img_crop, f"07_digit_crop_{idx}_{easyocr_char}")
	if easyocr_conf > 0.7 or easyocr_char in '.-' or digit_img_crop.shape[0] < 6 or digit_img_crop.shape[1] < 3:
	recognized_text += easyocr_char
	else:
	digit_from_segments = detect_segments(digit_img_crop)
	if digit_from_segments:
	recognized_text += digit_from_segments
	else:
	recognized_text += easyocr_char

	logging.info(f"Custom OCR before validation, recognized_text: {recognized_text}")
	if recognized_text:
	return recognized_text
	logging.info(f"Custom OCR text '{recognized_text}' is empty.")
	return None
	except Exception as e:
	logging.error(f"Custom seven-segment OCR failed: {str(e)}")
	return None

	def extract_weight_from_image(pil_img):
	"""Extract weight from a PIL image of a digital scale display"""
	try:
	img = np.array(pil_img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
	save_debug_image(img, "00_input_image")

	# Normalize image dimensions
	img = normalize_image(img)
	brightness = estimate_brightness(img)
	conf_threshold = 0.2 if brightness > 120 else 0.1

	roi_img, roi_bbox = detect_roi(img)
	custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
	if custom_result:
	logging.info(f"Raw custom OCR result: {custom_result}")
	# Minimal cleaning
	text = re.sub(r"[^\d\.\-]", "", custom_result) # Allow negative signs
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)
	if text:
	if text.startswith('.'):
	text = "0" + text
	if text.endswith('.'):
	text = text.rstrip('.')
	if text == '.' or text == '':
	logging.warning(f"Custom OCR result '{text}' is invalid after cleaning.")
	else:
	try:
	weight = float(text)
	logging.info(f"Custom OCR result: {text}, Confidence: 90.0%")
	return text, 90.0
	except ValueError:
	logging.warning(f"Custom OCR result '{text}' is not a valid number, falling back.")
	logging.warning(f"Custom OCR result '{custom_result}' failed cleaning, falling back.")

	logging.info("Custom OCR failed or invalid, falling back to general EasyOCR.")
	processed_roi_img = preprocess_image(roi_img)

	# Multiple thresholding approaches
	if brightness > 120:
	thresh = cv2.adaptiveThreshold(processed_roi_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, 51, 9)
	save_debug_image(thresh, "09_fallback_adaptive_thresh")
	else:
	_, thresh = cv2.threshold(processed_roi_img, 15, 255, cv2.THRESH_BINARY_INV)
	save_debug_image(thresh, "09_fallback_simple_thresh")

	# Morphological cleaning
	kernel = np.ones((5, 5), np.uint8)
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
	save_debug_image(thresh, "09_fallback_morph_cleaned")

	results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
	contrast_ths=0.05, adjust_contrast=1.2,
	text_threshold=0.1, mag_ratio=7.0,
	allowlist='0123456789.-', batch_size=4, y_ths=0.8)

	best_weight = None
	best_conf = 0.0
	best_score = 0.0
	for (bbox, text, conf) in results:
	logging.info(f"Fallback EasyOCR raw text: {text}, Confidence: {conf}")
	text = text.lower().strip()
	text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "")
	text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0")
	text = text.replace("s", "5").replace("S", "5")
	text = text.replace("g", "9").replace("G", "6")
	text = text.replace("l", "1").replace("I", "1").replace("\|", "1")
	text = text.replace("b", "8").replace("B", "8")
	text = text.replace("z", "2").replace("Z", "2")
	text = text.replace("a", "4").replace("A", "4")
	text = text.replace("e", "3")
	text = text.replace("t", "7")
	text = text.replace("~", "").replace("`", "")
	text = re.sub(r"(kgs\|kg\|k\|lb\|g\|gr\|pounds\|lbs)\b", "", text)
	text = re.sub(r"[^\d\.\-]", "", text)
	if text.count('.') > 1:
	parts = text.split('.')
	text = parts[0] + '.' + ''.join(parts[1:])
	text = text.strip('.')
	if len(text.replace('.', '').replace('-', '')) > 0:
	try:
	weight = float(text)
	range_score = 1.0
	if -1000 <= weight <= 1000: # Allow negative weights
	range_score = 1.5
	elif weight > 1000 and weight <= 2000:
	range_score = 1.0
	else:
	range_score = 0.5
	digit_count = len(text.replace('.', '').replace('-', ''))
	digit_score = 1.0
	if digit_count >= 2 and digit_count <= 6:
	digit_score = 1.3
	elif digit_count == 1:
	digit_score = 0.8
	score = conf * range_score * digit_score
	if roi_bbox:
	(x_roi, y_roi, w_roi, h_roi) = roi_bbox
	roi_area = w_roi * h_roi
	x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
	x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
	bbox_area = (x_max - x_min) * (y_max - y_min)
	if roi_area > 0 and bbox_area / roi_area < 0.01:
	score *= 0.5
	bbox_aspect_ratio = (x_max - x_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0
	if bbox_aspect_ratio < 0.05:
	score *= 0.7
	if score > best_score and conf > conf_threshold:
	best_weight = text
	best_conf = conf
	best_score = score
	logging.info(f"Candidate EasyOCR weight: '{text}', Conf: {conf}, Score: {score}")
	except ValueError:
	logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
	continue

	if not best_weight:
	logging.info("No valid weight detected after all attempts.")
	return "Not detected", 0.0

	if "." in best_weight:
	int_part, dec_part = best_weight.split(".")
	int_part = int_part.lstrip("0") or "0"
	dec_part = dec_part.rstrip('0')
	if not dec_part and int_part != "0":
	best_weight = int_part
	elif not dec_part and int_part == "0":
	best_weight = "0"
	else:
	best_weight = f"{int_part}.{dec_part}"
	else:
	best_weight = best_weight.lstrip('0') or "0"

	logging.info(f"Final detected weight: {best_weight}, Confidence: {round(best_conf * 100, 2)}%")
	return best_weight, round(best_conf * 100, 2)

	except Exception as e:
	logging.error(f"Weight extraction failed unexpectedly: {str(e)}")
	return "Not detected", 0.0