Spaces:

Mountchicken
/

MAERec-Gradio

Sleeping

App Files Files Community

MAERec-Gradio / tools /dataset_converters /textrecog /imgur_converter.py

Mountchicken

Upload 704 files

9bf4bd7 over 1 year ago

raw

history blame

5.39 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import argparse
	import math
	import os
	import os.path as osp

	import mmcv
	import mmengine
	import numpy as np

	from mmocr.utils import crop_img, dump_ocr_data


	def parse_args():
	parser = argparse.ArgumentParser(
	description='Generate training, validation and test set of IMGUR ')
	parser.add_argument('root_path', help='Root dir path of IMGUR')
	args = parser.parse_args()

	return args


	def collect_imgur_info(root_path, annotation_filename, print_every=1000):

	annotation_path = osp.join(root_path, 'annotations', annotation_filename)
	if not osp.exists(annotation_path):
	raise Exception(
	f'{annotation_path} not exists, please check and try again.')

	annotation = mmengine.load(annotation_path)
	images = annotation['index_to_ann_map'].keys()
	img_infos = []
	for i, img_name in enumerate(images):
	if i >= 0 and i % print_every == 0:
	print(f'{i}/{len(images)}')

	img_path = osp.join(root_path, 'imgs', img_name + '.jpg')

	# Skip not exist images
	if not osp.exists(img_path):
	continue

	img = mmcv.imread(img_path, 'unchanged')

	# Skip broken images
	if img is None:
	continue

	img_info = dict(
	file_name=img_name + '.jpg',
	height=img.shape[0],
	width=img.shape[1])

	anno_info = []
	for ann_id in annotation['index_to_ann_map'][img_name]:
	ann = annotation['ann_id'][ann_id]

	# The original annotation is oriented rects [x, y, w, h, a]
	box = np.fromstring(
	ann['bounding_box'][1:-2], sep=',', dtype=float)
	bbox = convert_oriented_box(box)
	word = ann['word']

	anno = dict(bbox=bbox, word=word)
	anno_info.append(anno)
	img_info.update(anno_info=anno_info)
	img_infos.append(img_info)

	return img_infos


	def convert_oriented_box(box):

	x_ctr, y_ctr, width, height, angle = box[:5]
	angle = -angle * math.pi / 180

	tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
	rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
	R = np.array([[np.cos(angle), -np.sin(angle)],
	[np.sin(angle), np.cos(angle)]])
	poly = R.dot(rect)
	x0, x1, x2, x3 = poly[0, :4] + x_ctr
	y0, y1, y2, y3 = poly[1, :4] + y_ctr
	poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
	poly = get_best_begin_point_single(poly)

	return poly.tolist()


	def get_best_begin_point_single(coordinate):

	x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
	xmin = min(x1, x2, x3, x4)
	ymin = min(y1, y2, y3, y4)
	xmax = max(x1, x2, x3, x4)
	ymax = max(y1, y2, y3, y4)
	combine = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
	[[x2, y2], [x3, y3], [x4, y4], [x1, y1]],
	[[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
	[[x4, y4], [x1, y1], [x2, y2], [x3, y3]]]
	dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
	force = 100000000.0
	force_flag = 0
	for i in range(4):
	temp_force = cal_line_length(combine[i][0], dst_coordinate[0]) \
	+ cal_line_length(combine[i][1], dst_coordinate[1]) \
	+ cal_line_length(combine[i][2], dst_coordinate[2]) \
	+ cal_line_length(combine[i][3], dst_coordinate[3])
	if temp_force < force:
	force = temp_force
	force_flag = i
	if force_flag != 0:
	pass

	return np.array(combine[force_flag]).reshape(8)


	def cal_line_length(point1, point2):

	return math.sqrt(
	math.pow(point1[0] - point2[0], 2) +
	math.pow(point1[1] - point2[1], 2))


	def generate_ann(root_path, split, image_infos):

	dst_image_root = osp.join(root_path, 'crops', split)
	dst_label_file = osp.join(root_path, f'{split}_label.json')
	os.makedirs(dst_image_root, exist_ok=True)

	img_info = []
	for image_info in image_infos:
	index = 1
	src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
	image = mmcv.imread(src_img_path)
	src_img_root = image_info['file_name'].split('.')[0]

	for anno in image_info['anno_info']:
	word = anno['word']
	dst_img = crop_img(image, anno['bbox'], 0, 0)

	# Skip invalid annotations
	if min(dst_img.shape) == 0:
	continue

	dst_img_name = f'{src_img_root}_{index}.png'
	index += 1
	dst_img_path = osp.join(dst_image_root, dst_img_name)
	mmcv.imwrite(dst_img, dst_img_path)

	img_info.append({
	'file_name': dst_img_name,
	'anno_info': [{
	'text': word
	}]
	})

	dump_ocr_data(img_info, dst_label_file, 'textrecog')


	def main():
	args = parse_args()
	root_path = args.root_path

	for split in ['train', 'val', 'test']:
	print(f'Processing {split} set...')
	with mmengine.Timer(
	print_tmpl='It takes {}s to convert IMGUR annotation'):
	anno_infos = collect_imgur_info(
	root_path, f'imgur5k_annotations_{split}.json')
	generate_ann(root_path, split, anno_infos)


	if __name__ == '__main__':
	main()