AutoWeightLogger2

Sleeping

App Files Files Community

AutoWeightLogger2 / ocr_engine.py

Sanjayraju30

Update ocr_engine.py

301eb4d verified 19 days ago

raw

history blame

1.94 kB

	import pytesseract
	import numpy as np
	import cv2
	import re
	import logging
	from PIL import Image

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def preprocess_for_ocr(img):
	"""Apply grayscale, blur, and threshold to prepare image for OCR."""
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)

	# Adaptive threshold
	thresh = cv2.adaptiveThreshold(
	blurred, 255,
	cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY,
	11, 2
	)

	# Invert to make text white on black
	inverted = cv2.bitwise_not(thresh)
	return inverted

	def extract_weight_from_image(pil_img):
	"""Extract weight reading from an image using pytesseract."""
	try:
	# Convert PIL to OpenCV
	img = np.array(pil_img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# Preprocess
	processed_img = preprocess_for_ocr(img)

	# Tesseract config
	config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'

	# Run OCR
	text = pytesseract.image_to_string(processed_img, config=config)

	# Clean text
	text = text.strip().replace('\n', '').replace(' ', '')
	text = re.sub(r"[^\d.]", "", text)

	# Handle multiple dots
	if text.count('.') > 1:
	text = text.replace('.', '', text.count('.') - 1)

	if text.startswith('.'):
	text = '0' + text

	# Validate
	if text and re.fullmatch(r"\d\.?\d", text):
	value = float(text)
	if 0.001 <= value <= 5000:
	return text, 90.0 # Return with fixed confidence
	else:
	logging.warning(f"Detected weight out of range: {value}")
	return "Not detected", 0.0

	except Exception as e:
	logging.error(f"OCR error: {str(e)}")
	return "Not detected", 0.0