Spaces:
Sleeping
Sleeping
File size: 2,119 Bytes
e58b1c2 975f9c6 5234a64 e58b1c2 5234a64 b18d0cd 9ac49a2 5234a64 b18d0cd 0f29b7c d373620 b18d0cd ded0d50 9ac49a2 b18d0cd 9ac49a2 3137c41 b18d0cd b613b80 b18d0cd 301eb4d b18d0cd 301eb4d b18d0cd 301eb4d 6ae35d6 301eb4d b613b80 b18d0cd 301eb4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def preprocess_strong(img):
"""Sharpen and enhance contrast for blurry weight images."""
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize up for OCR (scale x2)
h, w = gray.shape
gray = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_CUBIC)
# CLAHE for contrast enhancement
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# Strong sharpening
kernel = np.array([[0, -1, 0],
[-1, 5, -1],
[0, -1, 0]])
sharpened = cv2.filter2D(enhanced, -1, kernel)
return sharpened
def extract_weight_from_image(pil_img):
"""Extract weight from an image using multiple Tesseract strategies."""
try:
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
processed = preprocess_strong(img)
# OCR configs
configs = [
r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.',
r'--oem 3 --psm 11 -c tessedit_char_whitelist=0123456789.'
]
for config in configs:
raw_text = pytesseract.image_to_string(processed, config=config)
logging.info(f"[Tesseract Output {config}] Raw text: {raw_text}")
cleaned = raw_text.strip().replace('\n', '').replace(' ', '')
cleaned = re.sub(r"[^\d.]", "", cleaned)
if cleaned.count('.') > 1:
cleaned = cleaned.replace('.', '', cleaned.count('.') - 1)
if cleaned.startswith('.'):
cleaned = '0' + cleaned
if cleaned and re.fullmatch(r"\d*\.?\d*", cleaned):
value = float(cleaned)
if 0.001 <= value <= 5000:
return str(round(value, 2)), 90.0
return "Not detected", 0.0
except Exception as e:
logging.error(f"OCR failed: {e}")
return "Not detected", 0.0
|