File size: 4,696 Bytes
29f689c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import numpy as np
import cv2
from .ar_label_encode import ARLabelEncode


def crop_safe(arr, rect, bbs=[], pad=0):
    rect = np.array(rect)
    rect[:2] -= pad
    rect[2:] += 2 * pad
    v0 = [max(0, rect[0]), max(0, rect[1])]
    v1 = [
        min(arr.shape[0], rect[0] + rect[2]),
        min(arr.shape[1], rect[1] + rect[3])
    ]
    arr = arr[v0[0]:v1[0], v0[1]:v1[1], ...]
    if len(bbs) > 0:
        for i in range(len(bbs)):
            bbs[i, 0] -= v0[0]
            bbs[i, 1] -= v0[1]
        return arr, bbs
    else:
        return arr


try:
    # pygame==2.5.2
    import pygame
    from pygame import freetype
except:
    pass


class CAMLabelEncode(ARLabelEncode):
    """Convert between text-label and text-index."""

    def __init__(self,
                 max_text_length,
                 character_dict_path=None,
                 use_space_char=False,
                 font_path=None,
                 font_size=30,
                 font_strength=0.1,
                 image_shape=[32, 128],
                 **kwargs):
        super(CAMLabelEncode,
              self).__init__(max_text_length, character_dict_path,
                             use_space_char)
        self.image_shape = image_shape

        if font_path is not None:
            freetype.init()
            # init font
            self.font = freetype.Font(font_path)
            self.font.antialiased = True
            self.font.origin = True

            # choose font style
            self.font.size = font_size
            self.font.underline = False

            self.font.strong = True
            self.font.strength = font_strength
            self.font.oblique = False

    def render_normal(self, font, text):
        # get the number of lines
        lines = text.split('\n')
        lengths = [len(l) for l in lines]

        # font parameters:
        line_spacing = font.get_sized_height() + 1

        # initialize the surface to proper size:
        line_bounds = font.get_rect(lines[np.argmax(lengths)])
        fsize = (round(2.0 * line_bounds.width),
                 round(1.25 * line_spacing * len(lines)))
        surf = pygame.Surface(fsize, pygame.locals.SRCALPHA, 32)

        bbs = []
        space = font.get_rect('O')
        # space = font.get_rect(' ')
        x, y = 0, 0
        for l in lines:
            x = 2  # carriage-return
            y += line_spacing  # line-feed

            for ch in l:  # render each character
                if ch.isspace():  # just shift
                    x += space.width
                else:
                    # render the character
                    ch_bounds = font.render_to(surf, (x, y), ch)
                    # ch_bounds.x = x + ch_bounds.x
                    # ch_bounds.y = y - ch_bounds.y
                    x += ch_bounds.width + 5
                    bbs.append(np.array(ch_bounds))

        # get the union of characters for cropping:
        r0 = pygame.Rect(bbs[0])
        rect_union = r0.unionall(bbs)

        # get the words:
        # words = ' '.join(text.split())

        # crop the surface to fit the text:
        bbs = np.array(bbs)
        surf_arr, bbs = crop_safe(pygame.surfarray.pixels_alpha(surf),
                                  rect_union,
                                  bbs,
                                  pad=5)
        surf_arr = surf_arr.swapaxes(0, 1)

        # self.visualize_bb(surf_arr,bbs)
        return surf_arr, bbs

    def __call__(self, data):
        data = super().__call__(data=data)
        if data is None:
            return None
        word = []
        for c in data['label'][1:data['length'] + 1]:
            word.append(self.character[c])
        word = ''.join(word)
        # binary mask
        binary_mask, bbs = self.render_normal(self.font, word)
        cate_aware_surf = np.zeros((binary_mask.shape[0], binary_mask.shape[1],
                                    len(self.character) - 3)).astype(np.uint8)
        for id, bb in zip(data['label'][1:data['length'] + 1], bbs):
            char_id = id - 1
            cate_aware_surf[:, :,
                            char_id][bb[1]:bb[1] + bb[3], bb[0]:bb[0] +
                                     bb[2]] = binary_mask[bb[1]:bb[1] + bb[3],
                                                          bb[0]:bb[0] + bb[2]]
        binary_mask = cate_aware_surf
        binary_mask = cv2.resize(
            binary_mask, (self.image_shape[0] // 2, self.image_shape[1] // 2))
        if np.max(binary_mask) > 0:
            binary_mask = binary_mask / np.max(binary_mask)  # [0 ~ 1]
            binary_mask = binary_mask.astype(np.float32)
        data['binary_mask'] = binary_mask
        return data