Spaces:

Mountchicken
/

MAERec-Gradio

Sleeping

App Files Files Community

MAERec-Gradio / mmocr /datasets /preparers /packers /textrecog_packer.py

Mountchicken

Upload 704 files

9bf4bd7 over 1 year ago

raw

history blame

6.15 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import os.path as osp
	from typing import Dict, List, Tuple

	import mmcv
	from mmengine import mkdir_or_exist

	from mmocr.registry import DATA_PACKERS
	from mmocr.utils import bbox2poly, crop_img, poly2bbox, warp_img
	from .base import BasePacker


	@DATA_PACKERS.register_module()
	class TextRecogPacker(BasePacker):
	"""Text recogntion packer. It is used to pack the parsed annotation info
	to:

	.. code-block:: python

	{
	"metainfo":
	{
	"dataset_type": "TextRecogDataset",
	"task_name": "textrecog",
	},
	"data_list":
	[
	{
	"img_path": "textrecog_imgs/train/test_img.jpg",
	"instances":
	[
	{
	"text": "GRAND"
	}
	]
	}
	]
	}
	"""

	def pack_instance(self, sample: Tuple) -> Dict:
	"""Pack the text info to a recognition instance.

	Args:
	samples (Tuple): A tuple of (img_name, text).
	split (str): The split of the instance.

	Returns:
	Dict: The packed instance.
	"""

	img_name, text = sample
	img_name = osp.relpath(img_name, self.data_root)
	packed_instance = dict(instances=[dict(text=text)], img_path=img_name)

	return packed_instance

	def add_meta(self, sample: List) -> Dict:
	"""Add meta information to the sample.

	Args:
	sample (List): A list of samples of the dataset.

	Returns:
	Dict: A dict contains the meta information and samples.
	"""
	meta = {
	'metainfo': {
	'dataset_type': 'TextRecogDataset',
	'task_name': 'textrecog'
	},
	'data_list': sample
	}
	return meta


	@DATA_PACKERS.register_module()
	class TextRecogCropPacker(TextRecogPacker):
	"""Text recognition packer with image cropper. It is used to pack the
	parsed annotation info and crop out the word images from the full-size
	ones.

	Args:
	crop_with_warp (bool): Whether to crop the text from the original
	image using opencv warpPerspective.
	jitter (bool): (Applicable when crop_with_warp=True)
	Whether to jitter the box.
	jitter_ratio_x (float): (Applicable when crop_with_warp=True)
	Horizontal jitter ratio relative to the height.
	jitter_ratio_y (float): (Applicable when crop_with_warp=True)
	Vertical jitter ratio relative to the height.
	long_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
	The ratio of padding the long edge of the cropped image.
	Defaults to 0.1.
	short_edge_pad_ratio (float): (Applicable when crop_with_warp=False)
	The ratio of padding the short edge of the cropped image.
	Defaults to 0.05.
	"""

	def __init__(self,
	crop_with_warp: bool = False,
	jitter: bool = False,
	jitter_ratio_x: float = 0.0,
	jitter_ratio_y: float = 0.0,
	long_edge_pad_ratio: float = 0.0,
	short_edge_pad_ratio: float = 0.0,
	**kwargs):
	super().__init__(**kwargs)
	self.crop_with_warp = crop_with_warp
	self.jitter = jitter
	self.jrx = jitter_ratio_x
	self.jry = jitter_ratio_y
	self.lepr = long_edge_pad_ratio
	self.sepr = short_edge_pad_ratio
	# Crop converter crops the images of textdet to patches
	self.cropped_img_dir = 'textrecog_imgs'
	self.crop_save_path = osp.join(self.data_root, self.cropped_img_dir)
	mkdir_or_exist(self.crop_save_path)
	mkdir_or_exist(osp.join(self.crop_save_path, self.split))

	def pack_instance(self, sample: Tuple) -> List:
	"""Crop patches from image.

	Args:
	samples (Tuple): A tuple of (img_name, text).

	Return:
	List: The list of cropped patches.
	"""

	def get_box(instance: Dict) -> List:
	if 'box' in instance:
	return bbox2poly(instance['box']).tolist()
	if 'poly' in instance:
	return bbox2poly(poly2bbox(instance['poly'])).tolist()

	def get_poly(instance: Dict) -> List:
	if 'poly' in instance:
	return instance['poly']
	if 'box' in instance:
	return bbox2poly(instance['box']).tolist()

	data_list = []
	img_path, instances = sample
	img = mmcv.imread(img_path)
	for i, instance in enumerate(instances):
	if instance['ignore']:
	continue
	if self.crop_with_warp:
	poly = get_poly(instance)
	patch = warp_img(img, poly, self.jitter, self.jrx, self.jry)
	else:
	box = get_box(instance)
	patch = crop_img(img, box, self.lepr, self.sepr)
	if patch.shape[0] == 0 or patch.shape[1] == 0:
	continue
	text = instance['text']
	patch_name = osp.splitext(
	osp.basename(img_path))[0] + f'_{i}' + osp.splitext(
	osp.basename(img_path))[1]
	dst_path = osp.join(self.crop_save_path, self.split, patch_name)
	mmcv.imwrite(patch, dst_path)
	rec_instance = dict(
	instances=[dict(text=text)],
	img_path=osp.join(self.cropped_img_dir, self.split,
	patch_name))
	data_list.append(rec_instance)

	return data_list

	def add_meta(self, sample: List) -> Dict:
	# Since the TextRecogCropConverter packs all of the patches in a single
	# image into a list, we need to flatten the list.
	sample = [item for sublist in sample for item in sublist]
	return super().add_meta(sample)