Spaces:
Sleeping
Sleeping
File size: 6,152 Bytes
9bf4bd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Dict, List, Tuple
import mmcv
from mmengine import mkdir_or_exist
from mmocr.registry import DATA_PACKERS
from mmocr.utils import bbox2poly, crop_img, poly2bbox, warp_img
from .base import BasePacker
@DATA_PACKERS.register_module()
class TextRecogPacker(BasePacker):
"""Text recogntion packer. It is used to pack the parsed annotation info
to:
.. code-block:: python
{
"metainfo":
{
"dataset_type": "TextRecogDataset",
"task_name": "textrecog",
},
"data_list":
[
{
"img_path": "textrecog_imgs/train/test_img.jpg",
"instances":
[
{
"text": "GRAND"
}
]
}
]
}
"""
def pack_instance(self, sample: Tuple) -> Dict:
"""Pack the text info to a recognition instance.
Args:
samples (Tuple): A tuple of (img_name, text).
split (str): The split of the instance.
Returns:
Dict: The packed instance.
"""
img_name, text = sample
img_name = osp.relpath(img_name, self.data_root)
packed_instance = dict(instances=[dict(text=text)], img_path=img_name)
return packed_instance
def add_meta(self, sample: List) -> Dict:
"""Add meta information to the sample.
Args:
sample (List): A list of samples of the dataset.
Returns:
Dict: A dict contains the meta information and samples.
"""
meta = {
'metainfo': {
'dataset_type': 'TextRecogDataset',
'task_name': 'textrecog'
},
'data_list': sample
}
return meta
@DATA_PACKERS.register_module()
class TextRecogCropPacker(TextRecogPacker):
"""Text recognition packer with image cropper. It is used to pack the
parsed annotation info and crop out the word images from the full-size
ones.
Args:
crop_with_warp (bool): Whether to crop the text from the original
image using opencv warpPerspective.
jitter (bool): (Applicable when crop_with_warp=True)
Whether to jitter the box.
jitter_ratio_x (float): (Applicable when crop_with_warp=True)
Horizontal jitter ratio relative to the height.
jitter_ratio_y (float): (Applicable when crop_with_warp=True)
Vertical jitter ratio relative to the height.
long_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the long edge of the cropped image.
Defaults to 0.1.
short_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
The ratio of padding the short edge of the cropped image.
Defaults to 0.05.
"""
def __init__(self,
crop_with_warp: bool = False,
jitter: bool = False,
jitter_ratio_x: float = 0.0,
jitter_ratio_y: float = 0.0,
long_edge_pad_ratio: float = 0.0,
short_edge_pad_ratio: float = 0.0,
**kwargs):
super().__init__(**kwargs)
self.crop_with_warp = crop_with_warp
self.jitter = jitter
self.jrx = jitter_ratio_x
self.jry = jitter_ratio_y
self.lepr = long_edge_pad_ratio
self.sepr = short_edge_pad_ratio
# Crop converter crops the images of textdet to patches
self.cropped_img_dir = 'textrecog_imgs'
self.crop_save_path = osp.join(self.data_root, self.cropped_img_dir)
mkdir_or_exist(self.crop_save_path)
mkdir_or_exist(osp.join(self.crop_save_path, self.split))
def pack_instance(self, sample: Tuple) -> List:
"""Crop patches from image.
Args:
samples (Tuple): A tuple of (img_name, text).
Return:
List: The list of cropped patches.
"""
def get_box(instance: Dict) -> List:
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
if 'poly' in instance:
return bbox2poly(poly2bbox(instance['poly'])).tolist()
def get_poly(instance: Dict) -> List:
if 'poly' in instance:
return instance['poly']
if 'box' in instance:
return bbox2poly(instance['box']).tolist()
data_list = []
img_path, instances = sample
img = mmcv.imread(img_path)
for i, instance in enumerate(instances):
if instance['ignore']:
continue
if self.crop_with_warp:
poly = get_poly(instance)
patch = warp_img(img, poly, self.jitter, self.jrx, self.jry)
else:
box = get_box(instance)
patch = crop_img(img, box, self.lepr, self.sepr)
if patch.shape[0] == 0 or patch.shape[1] == 0:
continue
text = instance['text']
patch_name = osp.splitext(
osp.basename(img_path))[0] + f'_{i}' + osp.splitext(
osp.basename(img_path))[1]
dst_path = osp.join(self.crop_save_path, self.split, patch_name)
mmcv.imwrite(patch, dst_path)
rec_instance = dict(
instances=[dict(text=text)],
img_path=osp.join(self.cropped_img_dir, self.split,
patch_name))
data_list.append(rec_instance)
return data_list
def add_meta(self, sample: List) -> Dict:
# Since the TextRecogCropConverter packs all of the patches in a single
# image into a list, we need to flatten the list.
sample = [item for sublist in sample for item in sublist]
return super().add_meta(sample)
|