Mountchicken's picture
Upload 704 files
9bf4bd7
raw
history blame
4.74 kB
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import math
import os.path as osp
import mmcv
import mmengine
import numpy as np
from mmocr.utils import dump_ocr_data
def parse_args():
parser = argparse.ArgumentParser(
description='Generate training, validation and test set of IMGUR ')
parser.add_argument('root_path', help='Root dir path of IMGUR')
args = parser.parse_args()
return args
def collect_imgur_info(root_path, annotation_filename, print_every=1000):
annotation_path = osp.join(root_path, 'annotations', annotation_filename)
if not osp.exists(annotation_path):
raise Exception(
f'{annotation_path} not exists, please check and try again.')
annotation = mmengine.load(annotation_path)
images = annotation['index_to_ann_map'].keys()
img_infos = []
for i, img_name in enumerate(images):
if i >= 0 and i % print_every == 0:
print(f'{i}/{len(images)}')
img_path = osp.join(root_path, 'imgs', img_name + '.jpg')
# Skip not exist images
if not osp.exists(img_path):
continue
img = mmcv.imread(img_path, 'unchanged')
# Skip broken images
if img is None:
continue
img_info = dict(
file_name=img_name + '.jpg',
height=img.shape[0],
width=img.shape[1])
anno_info = []
for ann_id in annotation['index_to_ann_map'][img_name]:
ann = annotation['ann_id'][ann_id]
# The original annotation is oriented rects [x, y, w, h, a]
box = np.fromstring(
ann['bounding_box'][1:-2], sep=',', dtype=float)
quadrilateral = convert_oriented_box(box)
xs, ys = quadrilateral[::2], quadrilateral[1::2]
x = max(0, math.floor(min(xs)))
y = max(0, math.floor(min(ys)))
w = math.floor(max(xs)) - x
h = math.floor(max(ys)) - y
bbox = [x, y, w, h]
segmentation = quadrilateral
anno = dict(
iscrowd=0,
category_id=1,
bbox=bbox,
area=w * h,
segmentation=[segmentation])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
img_infos.append(img_info)
return img_infos
def convert_oriented_box(box):
x_ctr, y_ctr, width, height, angle = box[:5]
angle = -angle * math.pi / 180
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
poly = R.dot(rect)
x0, x1, x2, x3 = poly[0, :4] + x_ctr
y0, y1, y2, y3 = poly[1, :4] + y_ctr
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
poly = get_best_begin_point_single(poly)
return poly.tolist()
def get_best_begin_point_single(coordinate):
x1, y1, x2, y2, x3, y3, x4, y4 = coordinate
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
xmax = max(x1, x2, x3, x4)
ymax = max(y1, y2, y3, y4)
combine = [[[x1, y1], [x2, y2], [x3, y3], [x4, y4]],
[[x2, y2], [x3, y3], [x4, y4], [x1, y1]],
[[x3, y3], [x4, y4], [x1, y1], [x2, y2]],
[[x4, y4], [x1, y1], [x2, y2], [x3, y3]]]
dst_coordinate = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]]
force = 100000000.0
force_flag = 0
for i in range(4):
temp_force = cal_line_length(combine[i][0], dst_coordinate[0]) \
+ cal_line_length(combine[i][1], dst_coordinate[1]) \
+ cal_line_length(combine[i][2], dst_coordinate[2]) \
+ cal_line_length(combine[i][3], dst_coordinate[3])
if temp_force < force:
force = temp_force
force_flag = i
if force_flag != 0:
pass
return np.array(combine[force_flag]).reshape(8)
def cal_line_length(point1, point2):
return math.sqrt(
math.pow(point1[0] - point2[0], 2) +
math.pow(point1[1] - point2[1], 2))
def main():
args = parse_args()
root_path = args.root_path
for split in ['train', 'val', 'test']:
print(f'Processing {split} set...')
with mmengine.Timer(
print_tmpl='It takes {}s to convert IMGUR annotation'):
anno_infos = collect_imgur_info(
root_path, f'imgur5k_annotations_{split}.json')
dump_ocr_data(anno_infos,
osp.join(root_path, f'instances_{split}.json'),
'textdet')
if __name__ == '__main__':
main()