File size: 3,049 Bytes
c1b4f26
 
 
 
 
 
 
 
1d3d5c8
c1b4f26
1d3d5c8
 
 
 
 
 
 
 
c1b4f26
1d3d5c8
 
 
 
 
 
c1b4f26
1d3d5c8
 
 
 
 
 
 
c1b4f26
1d3d5c8
 
c1b4f26
1d3d5c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1b4f26
1d3d5c8
 
 
 
 
 
 
 
 
 
 
 
 
c1b4f26
1d3d5c8
 
 
 
 
 
 
c1b4f26
1d3d5c8
 
 
c1b4f26
1d3d5c8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from paddleocr import PaddleOCR
from vietocr.tool.config import Cfg
from vietocr.tool.predictor import Predictor
from utils.config import Config
import requests
import numpy as np
from PIL import Image, ImageTransform


class OCRDetector:
    def __init__(self) -> None:
        self.paddle_ocr = PaddleOCR(
            lang="en",
            use_angle_cls=False,
            use_gpu=True if Config.device == "cpu" else False,
            show_log=False,
        )
        # config['weights'] = './weights/transformerocr.pth'

        vietocr_config = Cfg.load_config_from_name("vgg_transformer")
        vietocr_config["weights"] = Config.ocr_path
        vietocr_config["cnn"]["pretrained"] = False
        vietocr_config["device"] = Config.device
        vietocr_config["predictor"]["beamsearch"] = False
        self.viet_ocr = Predictor(vietocr_config)

    def find_box(self, image):
        """Xác định box dựa vào mô hình paddle_ocr"""
        result = self.paddle_ocr.ocr(image, cls=False, rec=False)
        result = result[0]
        # Extracting detected components
        boxes = result  # [res[0] for res in result]
        boxes = np.array(boxes).astype(int)

        # scores = [res[1][1] for res in result]
        return boxes

    def cut_image_polygon(self, image, box):
        (x1, y1), (x2, y2), (x3, y3), (x4, y4) = box
        w = x2 - x1
        h = y4 - y1
        scl = h // 7
        new_box = (
            [max(x1 - scl, 0), max(y1 - scl, 0)],
            [x2 + scl, y2 - scl],
            [x3 + scl, y3 + scl],
            [x4 - scl, y4 + scl],
        )
        (x1, y1), (x2, y2), (x3, y3), (x4, y4) = new_box
        # Define 8-tuple with x,y coordinates of top-left, bottom-left, bottom-right and top-right corners and apply
        transform = [x1, y1, x4, y4, x3, y3, x2, y2]
        result = image.transform((w, h), ImageTransform.QuadTransform(transform))
        return result

    def vietnamese_text(self, boxes, image):
        """Xác định text dựa vào mô hình viet_ocr"""
        results = []
        for box in boxes:
            try:
                cut_image = self.cut_image_polygon(image, box)
                # cut_image = Image.fromarray(np.uint8(cut_image))
                text, score = self.viet_ocr.predict(cut_image, return_prob=True)
                if score > Config.vietocr_threshold:
                    results.append({"text": text, "score": score, "box": box})
            except:
                continue
        return results

    # Merge
    def text_detector(self, image_path):
        if image_path.startswith("https://"):
            image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
        else:
            image = Image.open(image_path).convert("RGB")
        # np_image = np.array(image)

        boxes = self.find_box(image_path)
        if not boxes.any():
            return None

        results = self.vietnamese_text(boxes, image)
        if results != []:
            return results
        else:
            return None