AutoWeightLogger2

Sleeping

App Files Files Community

AutoWeightLogger2 / ocr_engine.py

Sanjayraju30

Update ocr_engine.py

ef265f2 verified 13 days ago

raw

history blame

3.13 kB

	import pytesseract
	import numpy as np
	import cv2
	import re
	from PIL import Image
	import logging

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def preprocess_image(img):
	"""Preprocess image for robust OCR."""
	try:
	# Convert to OpenCV format
	img = np.array(img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# Convert to grayscale
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Estimate brightness for adaptive processing
	brightness = np.mean(gray)

	# Apply CLAHE for contrast enhancement
	clahe_clip = 4.0 if brightness < 100 else 2.0
	clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
	enhanced = clahe.apply(gray)

	# Apply adaptive thresholding
	block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
	thresh = cv2.adaptiveThreshold(
	enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
	)

	# Noise reduction
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
	thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)

	return thresh
	except Exception as e:
	logging.error(f"Preprocessing failed: {str(e)}")
	return img

	def extract_weight_from_image(pil_img):
	"""Extract weight from any digital scale image."""
	try:
	# Convert PIL image to OpenCV
	img = np.array(pil_img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# Preprocess image
	thresh = preprocess_image(img)

	# Try multiple Tesseract configurations
	configs = [
	r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.', # Single line
	r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' # Block of text
	]
	for config in configs:
	text = pytesseract.image_to_string(thresh, config=config)
	logging.info(f"Tesseract raw output (config {config}): {text}")

	# Clean and validate text
	text = re.sub(r"[^\d\.]", "", text)
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)
	text = text.strip('.')
	if text and re.fullmatch(r"^\d\.?\d$", text):
	text = text.lstrip('0') or '0'
	confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
	try:
	weight = float(text)
	if 0.001 <= weight <= 5000:
	logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%")
	return text, confidence
	except ValueError:
	logging.warning(f"Invalid weight format: {text}")

	logging.info("No valid weight detected.")
	return "Not detected", 0.0
	except Exception as e:
	logging.error(f"Weight extraction failed: {str(e)}")
	return "Not detected", 0.0