AutoWeightLogger2 / ocr_engine.py
Sanjayraju30's picture
Update ocr_engine.py
570a997 verified
raw
history blame
1.33 kB
import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def preprocess_image(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
blurred = cv2.GaussianBlur(resized, (3, 3), 0)
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
return thresh
def extract_weight_from_image(pil_img):
try:
img = np.array(pil_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
processed = preprocess_image(img)
config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg'
raw_text = pytesseract.image_to_string(processed, config=config)
logging.info(f"OCR Raw Output: {raw_text}")
cleaned = raw_text.replace(" ", "").replace("\n", "")
match = re.search(r"(\d+\.?\d*)", cleaned)
if match:
value = float(match.group(1))
if 0 < value <= 5000:
return str(value), 90.0
return "Not detected", 0.0
except Exception as e:
logging.error(f"OCR error: {e}")
return "Not detected", 0.0