File size: 4,170 Bytes
b3722f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a9ef07
 
 
 
 
b3722f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from pathlib import Path
import cv2
import os
import json
from paddleocr.tools.infer.predict_system import TextSystem
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from typing import Union, Any, Tuple, List, Optional, Dict
from PIL import Image, ImageDraw, ImageFont
import math
import numpy as np
from const import default_config

def draw_ocr_box_txt(image,
                     boxes,
                     txts,
                     scores=None,
                     show_score=False,
                     drop_score=0.5,
                     font_path="./fonts/simfang.ttf"):
    h, w = image.height, image.width
    img_left = image.copy()
    img_right = Image.new('RGB', (w, h), (255, 255, 255))

    import random

    random.seed(0)
    draw_left = ImageDraw.Draw(img_left)
    draw_right = ImageDraw.Draw(img_right)
    for idx, (box, txt, score) in enumerate(zip(boxes, txts, scores)):
        if scores is not None and scores[idx] < drop_score:
            continue
        color = (random.randint(0, 255), random.randint(0, 255),
                 random.randint(0, 255))
        draw_left.polygon([(i[0], i[1]) for i in box], fill=color)
        draw_right.polygon(
            [
                box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
                box[2][1], box[3][0], box[3][1]
            ],
            outline=color)
        box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
            1])**2)
        box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
            1])**2)
        if show_score:
            txt = txt + ':' + str(score)
        if box_height > 2 * box_width:
            font_size = max(int(box_width * 0.9), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            cur_y = box[0][1]
            for c in txt:
                char_bbox = font.getbbox(c)
                draw_right.text(
                    (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
                cur_y += char_bbox[3] - char_bbox[1]
        else:
            font_size = max(int(box_height * 0.8), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            draw_right.text(
                [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
    img_left = Image.blend(image, img_left, 0.5)
    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
    img_show.paste(img_left, (0, 0, w, h))
    img_show.paste(img_right, (w, 0, w * 2, h))
    return np.array(img_show)

def draw_ocr_results(image_fp: Union[str, Path, Image.Image], ocr_outs, font_path):
    # Credits: adapted from https://github.com/PaddlePaddle/PaddleOCR
    import cv2

    if isinstance(image_fp, (str, Path)):
        img = Image.open(image_fp).convert('RGB')
    else:
        img = image_fp

    txts = []
    scores = []
    boxes = []
    for _out in ocr_outs:
        txts.append(_out[1][0])
        scores.append(_out[1][1])
        boxes.append(_out[0])

    draw_img = draw_ocr_box_txt(
        img, boxes, txts, scores, drop_score=0.0, font_path=font_path
    )

    # cv2.imwrite(out_draw_fp, draw_img[:, :, ::-1])

    #plt.figure(figsize=(draw_img.shape[0]/100, draw_img.shape[1]/100), dpi=100)
    #plt.imshow(draw_img)
    #plt.axis('off')  # Turn off axis numbers
    #plt.show()
    return draw_img

class MyObject:
    def __init__(self, dictionary):
        for key in dictionary:
            setattr(self, key, dictionary[key])

def json_to_class_obj(args):
    return MyObject(args)

def start_ocr(params):
    args = json_to_class_obj({
        **default_config,
        **params,
    })

    imgs = []
    if os.path.isdir(args.image_dir):
        for i in os.listdir(args.image_dir):
            imgs.append(cv2.imread(os.path.join(args.image_dir, i)))
    else:
        imgs.append(cv2.imread(args.image_dir))

    ps = TextSystem(args)
    
    res = []
    for img in imgs:
        dt_boxes, rec_res, _ = ps.__call__(img, args.use_angle_cls)

        ocr_res = [[box.tolist(), res]
                   for box, res in zip(dt_boxes, rec_res)]
        
        res.append(ocr_res)
    return res