Spaces:
Sleeping
Sleeping
import pytesseract | |
import numpy as np | |
import cv2 | |
import re | |
import logging | |
from PIL import Image | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
def preprocess_image(img): | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR) | |
blurred = cv2.GaussianBlur(resized, (3, 3), 0) | |
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
cv2.THRESH_BINARY_INV, 11, 2) | |
return thresh | |
def extract_weight_from_image(pil_img): | |
try: | |
img = np.array(pil_img) | |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
processed = preprocess_image(img) | |
config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg' | |
raw_text = pytesseract.image_to_string(processed, config=config) | |
logging.info(f"OCR Raw Output: {raw_text}") | |
cleaned = raw_text.replace(" ", "").replace("\n", "") | |
match = re.search(r"(\d+\.?\d*)", cleaned) | |
if match: | |
value = float(match.group(1)) | |
if 0 < value <= 5000: | |
return str(value), 90.0 | |
return "Not detected", 0.0 | |
except Exception as e: | |
logging.error(f"OCR error: {e}") | |
return "Not detected", 0.0 | |