Spaces:
Sleeping
Sleeping
File size: 1,327 Bytes
e58b1c2 975f9c6 5234a64 e58b1c2 5234a64 9ac49a2 5234a64 570a997 0f29b7c 570a997 ded0d50 9ac49a2 570a997 6ae35d6 301eb4d b613b80 570a997 301eb4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def preprocess_image(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
blurred = cv2.GaussianBlur(resized, (3, 3), 0)
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
return thresh
def extract_weight_from_image(pil_img):
try:
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
processed = preprocess_image(img)
config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg'
raw_text = pytesseract.image_to_string(processed, config=config)
logging.info(f"OCR Raw Output: {raw_text}")
cleaned = raw_text.replace(" ", "").replace("\n", "")
match = re.search(r"(\d+\.?\d*)", cleaned)
if match:
value = float(match.group(1))
if 0 < value <= 5000:
return str(value), 90.0
return "Not detected", 0.0
except Exception as e:
logging.error(f"OCR error: {e}")
return "Not detected", 0.0
|