Spaces:
Running
Running
File size: 1,299 Bytes
5b73d24 65ed4c1 5b73d24 8fe1b94 65ed4c1 363a646 65ed4c1 8fe1b94 363a646 65ed4c1 701d11a 363a646 701d11a 8fe1b94 701d11a 8fe1b94 65ed4c1 8fe1b94 4a07e0e 8fe1b94 65ed4c1 8fe1b94 65ed4c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import pytesseract
import numpy as np
import re
import cv2
from PIL import Image
def extract_weight_from_image(pil_img):
try:
# Convert PIL image to numpy array
img = np.array(pil_img)
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# Resize image to improve OCR accuracy
resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
# Apply Gaussian blur
blurred = cv2.GaussianBlur(resized, (5, 5), 0)
# Apply adaptive thresholding for better contrast
thresh = cv2.adaptiveThreshold(
blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2
)
# OCR config: use digit-only mode and whitelist characters
config = "--psm 7 -c tessedit_char_whitelist=0123456789."
# Extract text using pytesseract
ocr_text = pytesseract.image_to_string(thresh, config=config)
print("OCR Text:", ocr_text)
# Use regex to find weight values (e.g., 52.35, 002.50 etc.)
match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", ocr_text)
if match:
return match.group(), 95.0
else:
return "No weight detected", 0.0
except Exception as e:
return f"Error: {str(e)}", 0.0
|