Spaces:

Sanjayraju30
/

logger

Sleeping

App Files Files Community

logger / ocr_engine.py

Sanjayraju30

Rename ocrr_engine.py to ocr_engine.py

41226bc verified 15 days ago

raw

history blame

4.21 kB

	import easyocr
	import numpy as np
	import cv2
	import re
	from PIL import Image
	import logging
	import sys

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])

	# Initialize EasyOCR reader (once at module level for efficiency)
	reader = easyocr.Reader(['en'], gpu=False) # GPU=False for CPU-only environments like Hugging Face Spaces

	def preprocess_image(img):
	"""Preprocess image for robust OCR with EasyOCR, optimized for weight displays."""
	try:
	# Convert PIL to OpenCV format
	img = np.array(img)
	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# Convert to grayscale
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Enhance contrast for diverse lighting conditions
	clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
	enhanced = clahe.apply(gray)

	# Apply adaptive thresholding
	block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
	thresh = cv2.adaptiveThreshold(
	enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
	)

	return thresh # EasyOCR handles further processing
	except Exception as e:
	logging.error(f"Preprocessing failed: {str(e)}")
	return gray

	def extract_weight_from_image(pil_img):
	"""Extract weight and unit from a digital scale image using EasyOCR."""
	try:
	# Preprocess image
	thresh = preprocess_image(pil_img)

	# Convert to numpy array for EasyOCR
	img_np = np.array(thresh)

	# Use EasyOCR to detect text
	results = reader.readtext(img_np, detail=1, paragraph=False)
	logging.info(f"EasyOCR raw output: {results}")

	# Extract weight and unit from detected text
	text = " ".join([result[1] for result in results]) # Combine all detected text
	text = text.strip().lower()
	text = re.sub(r'\s+', ' ', text) # Normalize spaces

	# Extract weight and unit, prioritizing common formats
	match = re.search(r'(-?\d*\.?\d+)([kgkg]?)', text)
	if match:
	weight_str = match.group(1)
	unit = match.group(2) if match.group(2) else "g" # Default to grams if no unit
	weight_str = weight_str.replace(',', '.') # Handle decimal formats (e.g., 68,0)
	if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
	weight_str = weight_str.lstrip('0') or '0'
	confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100 # Convert EasyOCR confidence (0-1) to percentage
	try:
	weight = float(weight_str)
	if -5000 <= weight <= 5000:
	logging.info(f"Detected weight: {weight} {unit}, Confidence: {confidence:.2f}%")
	return weight_str, confidence, unit
	except ValueError:
	logging.warning(f"Invalid weight format: {weight_str}")

	# Fallback to detect numbers without units if no match
	match_no_unit = re.search(r'(-?\d*\.?\d+)', text)
	if match_no_unit and not match:
	weight_str = match_no_unit.group(1)
	weight_str = weight_str.replace(',', '.')
	if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
	weight_str = weight_str.lstrip('0') or '0'
	confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100
	try:
	weight = float(weight_str)
	if -5000 <= weight <= 5000:
	logging.info(f"Detected weight (no unit): {weight} g, Confidence: {confidence:.2f}%")
	return weight_str, confidence, "g"
	except ValueError:
	logging.warning(f"Invalid weight format: {weight_str}")

	logging.info("No valid weight detected.")
	return "Not detected", 0.0, ""
	except Exception as e:
	logging.error(f"Weight extraction failed: {str(e)}")
	return "Not detected", 0.0, ""