Mountchicken's picture
Upload 704 files
9bf4bd7
raw
history blame
6.51 kB
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import List, Optional, Tuple, Union
import numpy as np
from mmengine import track_parallel_progress
from scipy.io import loadmat
from mmocr.utils import is_type_list
from ..data_preparer import DATA_PARSERS
from .base import BaseParser
@DATA_PARSERS.register_module()
class SynthTextAnnParser(BaseParser):
"""SynthText Text Detection Annotation Parser.
Args:
split (str): The split of the dataset. It is usually set automatically
and users do not need to set it manually in config file in most
cases.
nproc (int): Number of processes to process the data. Defaults to 1.
It is usually set automatically and users do not need to set it
manually in config file in most cases.
separator (str): The separator between each element in a line. Defaults
to ','.
ignore (str): The text to be ignored. Defaults to '###'.
format (str): The format of the annotation. Defaults to
'x1,y1,x2,y2,x3,y3,x4,trans'.
encoding (str): The encoding of the annotation file. Defaults to
'utf-8-sig'.
remove_strs (List[str], Optional): Used to remove redundant strings in
the transcription. Defaults to None.
mode (str, optional): The mode of the box converter. Supported modes
are 'xywh' and 'xyxy'. Defaults to None.
"""
def __init__(self,
split: str,
nproc: int,
separator: str = ',',
ignore: str = '###',
format: str = 'x1,y1,x2,y2,x3,y3,x4,y4,trans',
encoding: str = 'utf-8',
remove_strs: Optional[List[str]] = None,
mode: str = None) -> None:
self.sep = separator
self.format = format
self.encoding = encoding
self.ignore = ignore
self.mode = mode
self.remove_strs = remove_strs
super().__init__(split=split, nproc=nproc)
def _trace_boundary(self, char_boxes: List[np.ndarray]) -> np.ndarray:
"""Trace the boundary point of text.
Args:
char_boxes (list[ndarray]): The char boxes for one text. Each
element is 4x2 ndarray.
Returns:
ndarray: The boundary point sets with size nx2.
"""
assert is_type_list(char_boxes, np.ndarray)
# from top left to to right
p_top = [box[0:2] for box in char_boxes]
# from bottom right to bottom left
p_bottom = [
char_boxes[idx][[2, 3], :]
for idx in range(len(char_boxes) - 1, -1, -1)
]
p = p_top + p_bottom
boundary = np.concatenate(p).astype(int)
return boundary
def _match_bbox_char_str(self, bboxes: np.ndarray, char_bboxes: np.ndarray,
strs: np.ndarray
) -> Tuple[List[np.ndarray], List[str]]:
"""Match the bboxes, char bboxes, and strs.
Args:
bboxes (ndarray): The text boxes of size (2, 4, num_box).
char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box).
strs (ndarray): The string of size (num_strs,)
Returns:
Tuple(List[ndarray], List[str]): Polygon & word list.
"""
assert isinstance(bboxes, np.ndarray)
assert isinstance(char_bboxes, np.ndarray)
assert isinstance(strs, np.ndarray)
# bboxes = bboxes.astype(np.int32)
char_bboxes = char_bboxes.astype(np.int32)
if len(char_bboxes.shape) == 2:
char_bboxes = np.expand_dims(char_bboxes, axis=2)
char_bboxes = np.transpose(char_bboxes, (2, 1, 0))
num_boxes = 1 if len(bboxes.shape) == 2 else bboxes.shape[-1]
poly_charbox_list = [[] for _ in range(num_boxes)]
words = []
for line in strs:
words += line.split()
words_len = [len(w) for w in words]
words_end_inx = np.cumsum(words_len)
start_inx = 0
for word_inx, end_inx in enumerate(words_end_inx):
for char_inx in range(start_inx, end_inx):
poly_charbox_list[word_inx].append(char_bboxes[char_inx])
start_inx = end_inx
for box_inx in range(num_boxes):
assert len(poly_charbox_list[box_inx]) > 0
poly_boundary_list = []
for item in poly_charbox_list:
boundary = np.ndarray((0, 2))
if len(item) > 0:
boundary = self._trace_boundary(item)
poly_boundary_list.append(boundary)
return poly_boundary_list, words
def parse_files(self, img_paths: Union[List[str], str],
ann_paths: Union[List[str], str]) -> List[Tuple]:
"""Convert annotations to MMOCR format.
Args:
img_paths (str or list[str]): the list of image paths or the
directory of the images.
ann_paths (str or list[str]): the list of annotation paths or the
path of the annotation file which contains all the annotations.
Returns:
List[Tuple]: A list of a tuple of (image_path, instances).
- img_path (str): The path of image file, which can be read
directly by opencv.
- instance: instance is a list of dict containing parsed
annotations, which should contain the following keys:
- 'poly' or 'box' (textdet or textspotting)
- 'text' (textspotting or textrecog)
- 'ignore' (all task)
"""
assert isinstance(ann_paths, str)
gt = loadmat(ann_paths)
self.img_dir = img_paths
samples = track_parallel_progress(
self.parse_file,
list(
zip(gt['imnames'][0], gt['wordBB'][0], gt['charBB'][0],
gt['txt'][0])),
nproc=self.nproc)
return samples
def parse_file(self, annotation: Tuple) -> Tuple:
"""Parse single annotation."""
img_file, wordBB, charBB, txt = annotation
polys_list, word_list = self._match_bbox_char_str(wordBB, charBB, txt)
instances = list()
for poly, word in zip(polys_list, word_list):
instances.append(
dict(poly=poly.flatten().tolist(), text=word, ignore=False))
return osp.join(self.img_dir, img_file[0]), instances