File size: 4,875 Bytes
9bf4bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List, Optional, Tuple

from mmocr.registry import DATA_PARSERS
from mmocr.utils import bbox2poly
from .base import BaseParser


@DATA_PARSERS.register_module()
class ICDARTxtTextDetAnnParser(BaseParser):
    """ICDAR Txt Format Text Detection Annotation Parser.

    The original annotation format of this dataset is stored in txt files,
    which is formed as the following format:
        x1, y1, x2, y2, x3, y3, x4, y4, transcription

    Args:
        separator (str): The separator between each element in a line. Defaults
            to ','.
        ignore (str): The text to be ignored. Defaults to '###'.
        format (str): The format of the annotation. Defaults to
            'x1,y1,x2,y2,x3,y3,x4,trans'.
        encoding (str): The encoding of the annotation file. Defaults to
            'utf-8-sig'.
        nproc (int): The number of processes to parse the annotation. Defaults
            to 1.
        remove_strs (List[str], Optional): Used to remove redundant strings in
            the transcription. Defaults to None.
        mode (str, optional): The mode of the box converter. Supported modes
            are 'xywh' and 'xyxy'. Defaults to None.
    """

    def __init__(self,
                 separator: str = ',',
                 ignore: str = '###',
                 format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
                 encoding: str = 'utf-8',
                 remove_strs: Optional[List[str]] = None,
                 mode: str = None,
                 **kwargs) -> None:
        self.sep = separator
        self.format = format
        self.encoding = encoding
        self.ignore = ignore
        self.mode = mode
        self.remove_strs = remove_strs
        super().__init__(**kwargs)

    def parse_file(self, img_path: str, ann_path: str) -> Tuple:
        """Parse single annotation."""
        instances = list()
        for anno in self.loader(ann_path, self.sep, self.format,
                                self.encoding):
            anno = list(anno.values())
            if self.remove_strs is not None:
                for strs in self.remove_strs:
                    for i in range(len(anno)):
                        if strs in anno[i]:
                            anno[i] = anno[i].replace(strs, '')
            poly = list(map(float, anno[0:-1]))
            if self.mode is not None:
                poly = bbox2poly(poly, self.mode)
                poly = poly.tolist()
            text = anno[-1]
            instances.append(
                dict(poly=poly, text=text, ignore=text == self.ignore))

        return img_path, instances


@DATA_PARSERS.register_module()
class ICDARTxtTextRecogAnnParser(BaseParser):
    """ICDAR Txt Format Text Recognition Annotation Parser.

    The original annotation format of this dataset is stored in txt files,
    which is formed as the following format:
        img_path, transcription

    Args:
        separator (str): The separator between each element in a line. Defaults
            to ','.
        ignore (str): The text to be ignored. Defaults to '#'.
        format (str): The format of the annotation. Defaults to 'img, text'.
        encoding (str): The encoding of the annotation file. Defaults to
            'utf-8-sig'.
        nproc (int): The number of processes to parse the annotation. Defaults
            to 1.
        base_name (bool): Whether to use the basename of the image path as the
            image name. Defaults to False.
        remove_strs (List[str], Optional): Used to remove redundant strings in
            the transcription. Defaults to ['"'].
    """

    def __init__(self,
                 separator: str = ',',
                 ignore: str = '#',
                 format: str = 'img,text',
                 encoding: str = 'utf-8',
                 remove_strs: Optional[List[str]] = ['"'],
                 **kwargs) -> None:
        self.sep = separator
        self.format = format
        self.encoding = encoding
        self.ignore = ignore
        self.remove_strs = remove_strs
        super().__init__(**kwargs)

    def parse_files(self, img_dir: str, ann_path: str) -> List:
        """Parse annotations."""
        assert isinstance(ann_path, str)
        samples = list()
        for anno in self.loader(
                file_path=ann_path,
                format=self.format,
                encoding=self.encoding,
                separator=self.sep):
            text = anno['text'].strip()
            if self.remove_strs is not None:
                for strs in self.remove_strs:
                    text = text.replace(strs, '')
            if text == self.ignore:
                continue
            img_name = anno['img']
            samples.append((osp.join(img_dir, img_name), text))

        return samples