AutoWeightLogger1

Running

App Files Files Community

AutoWeightLogger1 / ocr_engine.py

Sanjayraju30

Update ocr_engine.py

12c2109 verified 5 days ago

raw

history blame

10.2 kB

	import easyocr
	import numpy as np
	import cv2
	import re
	import logging

	# Set up logging for debugging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Initialize EasyOCR
	easyocr_reader = easyocr.Reader(['en'], gpu=False)

	def estimate_brightness(img):
	"""Estimate image brightness to detect illuminated displays"""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	return np.mean(gray)

	def detect_roi(img):
	"""Detect and crop the region of interest (likely the digital display)"""
	try:
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	# Stricter threshold for bright areas
	brightness = estimate_brightness(img)
	thresh_value = 230 if brightness > 100 else 190
	_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
	# Morphological operations to connect digits
	kernel = np.ones((9, 9), np.uint8)
	dilated = cv2.dilate(thresh, kernel, iterations=3)
	# Find contours
	contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if contours:
	# Filter contours by size and aspect ratio
	valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
	if valid_contours:
	for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
	x, y, w, h = cv2.boundingRect(contour)
	aspect_ratio = w / h
	if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
	x, y = max(0, x-40), max(0, y-40)
	w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
	return img[y:y+h, x:x+w]
	return img
	except Exception as e:
	logging.error(f"ROI detection failed: {str(e)}")
	return img

	def correct_seven_segment(text, bbox, img):
	"""Correct common seven-segment misreads based on bounding box and pixel distribution"""
	if "2" in text or "6" in text:
	# Extract bounding box coordinates
	(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
	x_min, x_max = min(x1, x4), max(x2, x3)
	y_min, y_max = min(y1, y2), max(y3, y4)
	# Ensure bounds are within image
	x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
	x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max))
	if x_max <= x_min or y_max <= y_min:
	return text
	# Crop the digit area
	digit_area = img[y_min:y_max, x_min:x_max]
	if digit_area.size == 0:
	return text
	# Convert to grayscale and threshold
	gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY)
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	# Calculate pixel distribution in upper vs lower half
	h, w = thresh.shape
	upper_half = thresh[:h//2, :]
	lower_half = thresh[h//2:, :]
	upper_pixels = np.sum(upper_half == 255)
	lower_pixels = np.sum(lower_half == 255)
	# "6" has more pixels in the lower half due to the loop, "2" is more balanced
	if lower_pixels > upper_pixels * 1.5:
	text = text.replace("2", "6")
	else:
	text = text.replace("6", "2")
	return text

	def enhance_image(img, mode="standard"):
	"""Enhance image with different modes for multi-scale processing"""
	try:
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	if mode == "seven_segment":
	# Extremely minimal preprocessing for seven-segment displays
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	return thresh
	elif mode == "minimal":
	# No blurring, just threshold
	_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	return thresh
	elif mode == "raw":
	# No preprocessing, just convert to grayscale
	return gray
	elif mode == "high_contrast":
	denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
	clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
	thresh = clahe.apply(denoised)
	elif mode == "low_noise":
	denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
	clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
	thresh = clahe.apply(denoised)
	else:
	denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	thresh = clahe.apply(denoised)

	if mode not in ["seven_segment", "minimal", "raw"]:
	thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, 11, 2)

	# Morphological operations only for non-seven-segment modes
	if mode not in ["seven_segment", "minimal", "raw"]:
	kernel = np.ones((3, 3), np.uint8)
	morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
	else:
	morphed = thresh

	# Skip sharpening for seven-segment, minimal, and raw modes
	if mode not in ["seven_segment", "minimal", "raw"]:
	brightness = estimate_brightness(img)
	sharpen_strength = 3 if brightness > 100 else 5
	sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
	morphed = cv2.filter2D(morphed, -1, sharpen_kernel)

	# Dynamic resizing
	h, w = morphed.shape
	target_size = 800
	scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
	if scale_factor != 1.0:
	morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
	interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)

	return morphed
	except Exception as e:
	logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
	return img

	def extract_weight_from_image(pil_img):
	try:
	img = np.array(pil_img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# Estimate brightness for adaptive thresholding
	brightness = estimate_brightness(img)
	conf_threshold = 0.8 if brightness > 100 else 0.6 # Stricter for bright displays

	# Detect ROI
	roi_img = detect_roi(img)

	# Process multiple image versions
	images_to_process = [
	("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
	("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
	("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
	("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
	("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
	("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
	]

	best_weight = None
	best_conf = 0.0
	best_score = 0.0

	for mode, proc_img, ocr_params in images_to_process:
	# EasyOCR detection
	results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)

	for (bbox, text, conf) in results:
	# Apply seven-segment correction
	text = correct_seven_segment(text, bbox, roi_img)
	original_text = text
	text = text.lower().strip()

	# Fix common OCR errors
	text = text.replace(",", ".").replace(";", ".")
	text = text.replace("o", "0").replace("O", "0")
	text = text.replace("s", "5").replace("S", "5")
	text = text.replace("g", "9").replace("G", "6")
	text = text.replace("l", "1").replace("I", "1")
	text = text.replace("b", "8").replace("B", "8")
	text = text.replace("z", "2").replace("Z", "2")
	text = text.replace("q", "9").replace("Q", "9")
	text = text.replace("kgs", "").replace("kg", "").replace("k", "")
	text = re.sub(r"[^\d\.]", "", text)

	# Regex for weight (0.0 to 9999.999)
	if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
	try:
	weight = float(text)
	# Score based on realistic weight range (0.1–500 kg)
	range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
	# Prefer two-digit weights for scales
	digit_score = 1.5 if 10 <= weight < 100 else 1.0
	score = conf * range_score * digit_score
	if score > best_score and conf > conf_threshold:
	best_weight = text
	best_conf = conf
	best_score = score
	except ValueError:
	continue

	if not best_weight:
	logging.info("No valid weight detected")
	return "Not detected", 0.0

	# Format output
	if "." in best_weight:
	int_part, dec_part = best_weight.split(".")
	int_part = int_part.lstrip("0") or "0"
	best_weight = f"{int_part}.{dec_part.rstrip('0')}"
	else:
	best_weight = best_weight.lstrip('0') or "0"

	return best_weight, round(best_conf * 100, 2)

	except Exception as e:
	logging.error(f"Weight extraction failed: {str(e)}")
	return "Not detected", 0.0