|
import os |
|
import numpy as np |
|
import pytesseract |
|
from PIL import Image |
|
|
|
def extract_weight_from_image(pil_img): |
|
os.system("apt-get update && apt-get install -y libgl1-mesa-glx") |
|
import cv2 |
|
|
|
image = np.array(pil_img.convert("RGB")) |
|
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
|
blur = cv2.GaussianBlur(gray, (3, 3), 0) |
|
_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) |
|
|
|
config = '--psm 7 -c tessedit_char_whitelist=0123456789.' |
|
data = pytesseract.image_to_data(thresh, config=config, output_type=pytesseract.Output.DICT) |
|
|
|
extracted_text = ''.join(filter(lambda x: x in '0123456789.', ''.join(data['text']))) |
|
confidences = [int(conf) for conf in data['conf'] if conf.isdigit()] |
|
avg_conf = sum(confidences)/len(confidences) if confidences else 0 |
|
|
|
return extracted_text.strip(), avg_conf |