File size: 858 Bytes
6d93c4d
 
 
 
1a4ff0a
6d93c4d
7112c61
e9a7c89
7112c61
1a4ff0a
 
 
 
7112c61
1a4ff0a
 
7112c61
1a4ff0a
 
6d93c4d
7112c61
41933ea
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import os
import numpy as np
import pytesseract
from PIL import Image

def extract_weight_from_image(pil_img):
    os.system("apt-get update && apt-get install -y libgl1-mesa-glx")
    import cv2

    image = np.array(pil_img.convert("RGB"))
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    blur = cv2.GaussianBlur(gray, (3, 3), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    config = '--psm 7 -c tessedit_char_whitelist=0123456789.'
    data = pytesseract.image_to_data(thresh, config=config, output_type=pytesseract.Output.DICT)

    extracted_text = ''.join(filter(lambda x: x in '0123456789.', ''.join(data['text'])))
    confidences = [int(conf) for conf in data['conf'] if conf.isdigit()]
    avg_conf = sum(confidences)/len(confidences) if confidences else 0

    return extracted_text.strip(), avg_conf