Spaces:
Sleeping
Sleeping
from paddleocr import PaddleOCR | |
from vietocr.tool.config import Cfg | |
from vietocr.tool.predictor import Predictor | |
from utils.config import Config | |
import requests | |
import numpy as np | |
from PIL import Image, ImageTransform | |
class OCRDetector: | |
def __init__(self) -> None: | |
self.paddle_ocr = PaddleOCR( | |
lang="en", | |
use_angle_cls=False, | |
use_gpu=True if Config.device == "cpu" else False, | |
show_log=False, | |
) | |
# config['weights'] = './weights/transformerocr.pth' | |
vietocr_config = Cfg.load_config_from_name("vgg_transformer") | |
vietocr_config["weights"] = Config.ocr_path | |
vietocr_config["cnn"]["pretrained"] = False | |
vietocr_config["device"] = Config.device | |
vietocr_config["predictor"]["beamsearch"] = False | |
self.viet_ocr = Predictor(vietocr_config) | |
def find_box(self, image): | |
"""Xác định box dựa vào mô hình paddle_ocr""" | |
result = self.paddle_ocr.ocr(image, cls=False, rec=False) | |
result = result[0] | |
# Extracting detected components | |
boxes = result # [res[0] for res in result] | |
boxes = np.array(boxes).astype(int) | |
# scores = [res[1][1] for res in result] | |
return boxes | |
def cut_image_polygon(self, image, box): | |
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = box | |
w = x2 - x1 | |
h = y4 - y1 | |
scl = h // 7 | |
new_box = ( | |
[max(x1 - scl, 0), max(y1 - scl, 0)], | |
[x2 + scl, y2 - scl], | |
[x3 + scl, y3 + scl], | |
[x4 - scl, y4 + scl], | |
) | |
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = new_box | |
# Define 8-tuple with x,y coordinates of top-left, bottom-left, bottom-right and top-right corners and apply | |
transform = [x1, y1, x4, y4, x3, y3, x2, y2] | |
result = image.transform((w, h), ImageTransform.QuadTransform(transform)) | |
return result | |
def vietnamese_text(self, boxes, image): | |
"""Xác định text dựa vào mô hình viet_ocr""" | |
results = [] | |
for box in boxes: | |
try: | |
cut_image = self.cut_image_polygon(image, box) | |
# cut_image = Image.fromarray(np.uint8(cut_image)) | |
text, score = self.viet_ocr.predict(cut_image, return_prob=True) | |
if score > Config.vietocr_threshold: | |
results.append({"text": text, "score": score, "box": box}) | |
except: | |
continue | |
return results | |
# Merge | |
def text_detector(self, image_path): | |
if image_path.startswith("https://"): | |
image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB") | |
else: | |
image = Image.open(image_path).convert("RGB") | |
# np_image = np.array(image) | |
boxes = self.find_box(image_path) | |
if not boxes.any(): | |
return None | |
results = self.vietnamese_text(boxes, image) | |
if results != []: | |
return results | |
else: | |
return None | |