AutoWeightLogger1

Running

App Files Files Community

AutoWeightLogger1 / ocr_engine.py

Sanjayraju30

Update ocr_engine.py

d23e846 verified about 18 hours ago

raw

history blame

12.3 kB

	import pytesseract
	import numpy as np
	import cv2
	import re
	import logging
	from datetime import datetime
	import os
	from PIL import Image

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Directory for debug images
	DEBUG_DIR = "debug_images"
	os.makedirs(DEBUG_DIR, exist_ok=True)

	def save_debug_image(img, filename_suffix, prefix=""):
	"""Save image to debug directory with timestamp."""
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
	filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
	if isinstance(img, Image.Image):
	img.save(filename)
	elif len(img.shape) == 3:
	cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
	else:
	cv2.imwrite(filename, img)
	logging.info(f"Saved debug image: {filename}")

	def estimate_brightness(img):
	"""Estimate image brightness."""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	return np.mean(gray)

	def preprocess_image(img):
	"""Preprocess image for OCR with aggressive contrast and noise reduction."""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	brightness = estimate_brightness(img)
	# Aggressive CLAHE
	clahe_clip = 6.0 if brightness < 80 else 4.0
	clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
	enhanced = clahe.apply(gray)
	save_debug_image(enhanced, "01_preprocess_clahe")
	# Minimal blur to preserve edges
	blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
	save_debug_image(blurred, "02_preprocess_blur")
	# Multi-scale thresholding
	block_size = max(9, min(25, int(img.shape[0] / 20) * 2 + 1))
	thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, block_size, 7)
	# Morphological operations
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
	save_debug_image(thresh, "03_preprocess_morph")
	return thresh, enhanced

	def correct_rotation(img):
	"""Correct image rotation using edge detection."""
	try:
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	edges = cv2.Canny(gray, 30, 100, apertureSize=3)
	lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=40, minLineLength=20, maxLineGap=10)
	if lines is not None:
	angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
	angle = np.median(angles)
	if abs(angle) > 0.5:
	h, w = img.shape[:2]
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, angle, 1.0)
	img = cv2.warpAffine(img, M, (w, h))
	save_debug_image(img, "00_rotated_image")
	logging.info(f"Applied rotation: {angle:.2f} degrees")
	return img
	except Exception as e:
	logging.error(f"Rotation correction failed: {str(e)}")
	return img

	def detect_roi(img):
	"""Detect region of interest with aggressive contour filtering."""
	try:
	save_debug_image(img, "04_original")
	thresh, enhanced = preprocess_image(img)
	brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	block_sizes = [max(9, min(25, int(img.shape[0] / s) * 2 + 1)) for s in [10, 15, 20]]
	valid_contours = []
	img_area = img.shape[0] * img.shape[1]

	for block_size in block_sizes:
	temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY_INV, block_size, 7)
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
	temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
	save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
	contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	for c in contours:
	area = cv2.contourArea(c)
	x, y, w, h = cv2.boundingRect(c)
	roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
	aspect_ratio = w / h
	if (500 < area < (img_area * 0.5) and
	0.5 <= aspect_ratio <= 6.0 and w > 80 and h > 40 and roi_brightness > 60):
	valid_contours.append((c, area * roi_brightness))
	logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")

	if valid_contours:
	contour, _ = max(valid_contours, key=lambda x: x[1])
	x, y, w, h = cv2.boundingRect(contour)
	padding = max(25, min(70, int(min(w, h) * 0.5)))
	x, y = max(0, x - padding), max(0, y - padding)
	w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
	roi_img = img[y:y+h, x:x+w]
	save_debug_image(roi_img, "06_detected_roi")
	logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
	return roi_img, (x, y, w, h)

	logging.info("No ROI found, using full image.")
	save_debug_image(img, "06_no_roi_fallback")
	return img, None
	except Exception as e:
	logging.error(f"ROI detection failed: {str(e)}")
	save_debug_image(img, "06_roi_error_fallback")
	return img, None

	def detect_digit_contour(digit_img, brightness):
	"""Simplified contour-based digit recognition."""
	try:
	h, w = digit_img.shape
	if h < 20 or w < 10:
	logging.debug("Digit image too small for contour detection.")
	return None

	# Normalize image
	pixel_count = np.sum(digit_img == 255)
	total_pixels = digit_img.size
	density = pixel_count / total_pixels
	if density < 0.1 or density > 0.8:
	return None

	# Contour analysis
	contours, _ = cv2.findContours(digit_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not contours:
	return None

	contour = max(contours, key=cv2.contourArea)
	x, y, cw, ch = cv2.boundingRect(contour)
	if cw < 5 or ch < 10:
	return None

	aspect = cw / ch
	area_ratio = cv2.contourArea(contour) / (cw * ch)

	# Simplified digit patterns
	if aspect > 0.2 and aspect < 0.4 and area_ratio > 0.5:
	return '1'
	elif aspect > 0.5 and area_ratio > 0.6:
	if density > 0.5:
	return '8'
	elif density > 0.3:
	return '0'
	elif aspect > 0.4 and area_ratio > 0.5:
	if density > 0.4:
	return '3'
	elif density > 0.3:
	return '2'
	elif aspect > 0.3 and area_ratio > 0.4:
	return '5' if density > 0.3 else '7'
	elif aspect > 0.2 and area_ratio > 0.3:
	return '4' if density > 0.2 else '9'
	return None
	except Exception as e:
	logging.error(f"Contour digit detection failed: {str(e)}")
	return None

	def perform_ocr(img, roi_bbox):
	"""Perform OCR with Tesseract and contour-based fallback."""
	try:
	thresh, enhanced = preprocess_image(img)
	brightness = estimate_brightness(img)
	pil_img = Image.fromarray(enhanced)
	save_debug_image(pil_img, "07_ocr_input")

	# Tesseract with aggressive numeric config
	custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
	text = pytesseract.image_to_string(pil_img, config=custom_config)
	logging.info(f"Tesseract raw output: {text}")

	# Clean and validate
	text = re.sub(r"[^\d\.]", "", text)
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)
	text = text.strip('.')
	if text and re.fullmatch(r"^\d\.?\d$", text):
	text = text.lstrip('0') or '0'
	confidence = 98.0 if len(text.replace('.', '')) >= 3 else 95.0
	logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
	return text, confidence

	# Fallback to contour-based detection
	logging.info("Tesseract failed, using contour-based detection.")
	contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	digits_info = []
	for c in contours:
	x, y, w, h = cv2.boundingRect(c)
	if w > 15 and h > 20 and 0.2 <= w/h <= 1.2:
	digits_info.append((x, x+w, y, y+h))

	if digits_info:
	digits_info.sort(key=lambda x: x[0])
	recognized_text = ""
	prev_x_max = -float('inf')
	for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
	x_min, y_min = max(0, x_min), max(0, y_min)
	x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
	if x_max <= x_min or y_max <= y_min:
	continue
	digit_crop = thresh[y_min:y_max, x_min:x_max]
	save_debug_image(digit_crop, f"08_digit_crop_{idx}")
	digit = detect_digit_contour(digit_crop, brightness)
	if digit:
	recognized_text += digit
	elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
	recognized_text += '.'
	prev_x_max = x_max

	text = re.sub(r"[^\d\.]", "", recognized_text)
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)
	text = text.strip('.')
	if text and re.fullmatch(r"^\d\.?\d$", text):
	text = text.lstrip('0') or '0'
	confidence = 92.0 if len(text.replace('.', '')) >= 3 else 90.0
	logging.info(f"Validated contour text: {text}, Confidence: {confidence:.2f}%")
	return text, confidence

	logging.info("No valid digits detected.")
	return None, 0.0
	except Exception as e:
	logging.error(f"OCR failed: {str(e)}")
	return None, 0.0

	def extract_weight_from_image(pil_img):
	"""Extract weight from a digital scale image."""
	try:
	img = np.array(pil_img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
	save_debug_image(img, "00_input_image")
	img = correct_rotation(img)
	brightness = estimate_brightness(img)
	conf_threshold = 0.9 if brightness > 100 else 0.7

	roi_img, roi_bbox = detect_roi(img)
	if roi_bbox:
	conf_threshold = 1.15 if (roi_bbox[2] roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0

	result, confidence = perform_ocr(roi_img, roi_bbox)
	if result and confidence >= conf_threshold * 100:
	try:
	weight = float(result)
	if 0.01 <= weight <= 1000:
	logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
	return result, confidence
	logging.warning(f"Weight {result} out of range.")
	except ValueError:
	logging.warning(f"Invalid weight format: {result}")

	logging.info("Primary OCR failed, using full image fallback.")
	result, confidence = perform_ocr(img, None)
	if result and confidence >= conf_threshold * 0.95 * 100:
	try:
	weight = float(result)
	if 0.01 <= weight <= 1000:
	logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
	return result, confidence
	logging.warning(f"Full image weight {result} out of range.")
	except ValueError:
	logging.warning(f"Invalid full image weight format: {result}")

	logging.info("No valid weight detected.")
	return "Not detected", 0.0
	except Exception as e:
	logging.error(f"Weight extraction failed: {str(e)}")
	return "Not detected", 0.0