File size: 6,505 Bytes
9bf4bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List, Optional, Tuple, Union

import numpy as np
from mmengine import track_parallel_progress
from scipy.io import loadmat

from mmocr.utils import is_type_list
from ..data_preparer import DATA_PARSERS
from .base import BaseParser


@DATA_PARSERS.register_module()
class SynthTextAnnParser(BaseParser):
    """SynthText Text Detection Annotation Parser.

    Args:
        split (str): The split of the dataset. It is usually set automatically
            and users do not need to set it manually in config file in most
            cases.
        nproc (int): Number of processes to process the data. Defaults to 1.
            It is usually set automatically and users do not need to set it
            manually in config file in most cases.
        separator (str): The separator between each element in a line. Defaults
            to ','.
        ignore (str): The text to be ignored. Defaults to '###'.
        format (str): The format of the annotation. Defaults to
            'x1,y1,x2,y2,x3,y3,x4,trans'.
        encoding (str): The encoding of the annotation file. Defaults to
            'utf-8-sig'.
        remove_strs (List[str], Optional): Used to remove redundant strings in
            the transcription. Defaults to None.
        mode (str, optional): The mode of the box converter. Supported modes
            are 'xywh' and 'xyxy'. Defaults to None.
    """

    def __init__(self,
                 split: str,
                 nproc: int,
                 separator: str = ',',
                 ignore: str = '###',
                 format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
                 encoding: str = 'utf-8',
                 remove_strs: Optional[List[str]] = None,
                 mode: str = None) -> None:
        self.sep = separator
        self.format = format
        self.encoding = encoding
        self.ignore = ignore
        self.mode = mode
        self.remove_strs = remove_strs
        super().__init__(split=split, nproc=nproc)

    def _trace_boundary(self, char_boxes: List[np.ndarray]) -> np.ndarray:
        """Trace the boundary point of text.

        Args:
            char_boxes (list[ndarray]): The char boxes for one text. Each
                element is 4x2 ndarray.

        Returns:
            ndarray: The boundary point sets with size nx2.
        """
        assert is_type_list(char_boxes, np.ndarray)

        # from top left to to right
        p_top = [box[0:2] for box in char_boxes]
        # from bottom right to bottom left
        p_bottom = [
            char_boxes[idx][[2, 3], :]
            for idx in range(len(char_boxes) - 1, -1, -1)
        ]

        p = p_top + p_bottom

        boundary = np.concatenate(p).astype(int)

        return boundary

    def _match_bbox_char_str(self, bboxes: np.ndarray, char_bboxes: np.ndarray,
                             strs: np.ndarray
                             ) -> Tuple[List[np.ndarray], List[str]]:
        """Match the bboxes, char bboxes, and strs.

        Args:
            bboxes (ndarray): The text boxes of size (2, 4, num_box).
            char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box).
            strs (ndarray): The string of size (num_strs,)

        Returns:
            Tuple(List[ndarray], List[str]): Polygon & word list.
        """
        assert isinstance(bboxes, np.ndarray)
        assert isinstance(char_bboxes, np.ndarray)
        assert isinstance(strs, np.ndarray)
        # bboxes = bboxes.astype(np.int32)
        char_bboxes = char_bboxes.astype(np.int32)

        if len(char_bboxes.shape) == 2:
            char_bboxes = np.expand_dims(char_bboxes, axis=2)
        char_bboxes = np.transpose(char_bboxes, (2, 1, 0))
        num_boxes = 1 if len(bboxes.shape) == 2 else bboxes.shape[-1]

        poly_charbox_list = [[] for _ in range(num_boxes)]

        words = []
        for line in strs:
            words += line.split()
        words_len = [len(w) for w in words]
        words_end_inx = np.cumsum(words_len)
        start_inx = 0
        for word_inx, end_inx in enumerate(words_end_inx):
            for char_inx in range(start_inx, end_inx):
                poly_charbox_list[word_inx].append(char_bboxes[char_inx])
            start_inx = end_inx

        for box_inx in range(num_boxes):
            assert len(poly_charbox_list[box_inx]) > 0

        poly_boundary_list = []
        for item in poly_charbox_list:
            boundary = np.ndarray((0, 2))
            if len(item) > 0:
                boundary = self._trace_boundary(item)
            poly_boundary_list.append(boundary)

        return poly_boundary_list, words

    def parse_files(self, img_paths: Union[List[str], str],
                    ann_paths: Union[List[str], str]) -> List[Tuple]:
        """Convert annotations to MMOCR format.

        Args:
            img_paths (str or list[str]): the list of image paths or the
                directory of the images.
            ann_paths (str or list[str]): the list of annotation paths or the
                path of the annotation file which contains all the annotations.

        Returns:
            List[Tuple]: A list of a tuple of (image_path, instances).

            - img_path (str): The path of image file, which can be read
              directly by opencv.
            - instance: instance is a list of dict containing parsed
              annotations, which should contain the following keys:

              - 'poly' or 'box' (textdet or textspotting)
              - 'text' (textspotting or textrecog)
              - 'ignore' (all task)
        """
        assert isinstance(ann_paths, str)
        gt = loadmat(ann_paths)
        self.img_dir = img_paths
        samples = track_parallel_progress(
            self.parse_file,
            list(
                zip(gt['imnames'][0], gt['wordBB'][0], gt['charBB'][0],
                    gt['txt'][0])),
            nproc=self.nproc)
        return samples

    def parse_file(self, annotation: Tuple) -> Tuple:
        """Parse single annotation."""
        img_file, wordBB, charBB, txt = annotation
        polys_list, word_list = self._match_bbox_char_str(wordBB, charBB, txt)

        instances = list()
        for poly, word in zip(polys_list, word_list):
            instances.append(
                dict(poly=poly.flatten().tolist(), text=word, ignore=False))
        return osp.join(self.img_dir, img_file[0]), instances