Spaces:
Sleeping
Sleeping
import pytesseract | |
import numpy as np | |
import cv2 | |
import re | |
import logging | |
from PIL import Image | |
# Set up logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
def preprocess_for_ocr(img): | |
"""Apply grayscale, blur, and threshold to prepare image for OCR.""" | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
# Adaptive threshold | |
thresh = cv2.adaptiveThreshold( | |
blurred, 255, | |
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
cv2.THRESH_BINARY, | |
11, 2 | |
) | |
# Invert to make text white on black | |
inverted = cv2.bitwise_not(thresh) | |
return inverted | |
def extract_weight_from_image(pil_img): | |
"""Extract weight reading from an image using pytesseract.""" | |
try: | |
# Convert PIL to OpenCV | |
img = np.array(pil_img) | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
# Preprocess | |
processed_img = preprocess_for_ocr(img) | |
# Tesseract config | |
config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.' | |
# Run OCR | |
text = pytesseract.image_to_string(processed_img, config=config) | |
# Clean text | |
text = text.strip().replace('\n', '').replace(' ', '') | |
text = re.sub(r"[^\d.]", "", text) | |
# Handle multiple dots | |
if text.count('.') > 1: | |
text = text.replace('.', '', text.count('.') - 1) | |
if text.startswith('.'): | |
text = '0' + text | |
# Validate | |
if text and re.fullmatch(r"\d*\.?\d*", text): | |
value = float(text) | |
if 0.001 <= value <= 5000: | |
return text, 90.0 # Return with fixed confidence | |
else: | |
logging.warning(f"Detected weight out of range: {value}") | |
return "Not detected", 0.0 | |
except Exception as e: | |
logging.error(f"OCR error: {str(e)}") | |
return "Not detected", 0.0 | |