Spaces:
Runtime error
Runtime error
File size: 2,692 Bytes
2366e36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
import os.path as osp
import cv2
from mmocr.utils import list_from_file, list_to_file
def parse_old_label(data_root, in_path, img_size=False):
imgid2imgname = {}
imgid2anno = {}
idx = 0
for line in list_from_file(in_path):
line = line.strip().split()
img_full_path = osp.join(data_root, line[0])
if not osp.exists(img_full_path):
continue
ann_file = osp.join(data_root, line[1])
if not osp.exists(ann_file):
continue
img_info = {}
img_info['file_name'] = line[0]
if img_size:
img = cv2.imread(img_full_path)
h, w = img.shape[:2]
img_info['height'] = h
img_info['width'] = w
imgid2imgname[idx] = img_info
imgid2anno[idx] = []
char_annos = []
for t, ann_line in enumerate(list_from_file(ann_file)):
ann_line = ann_line.strip()
if t == 0:
img_info['text'] = ann_line
else:
char_box = [float(x) for x in ann_line.split()]
char_text = img_info['text'][t - 1]
char_ann = dict(char_box=char_box, char_text=char_text)
char_annos.append(char_ann)
imgid2anno[idx] = char_annos
idx += 1
return imgid2imgname, imgid2anno
def gen_line_dict_file(out_path, imgid2imgname, imgid2anno, img_size=False):
lines = []
for key, value in imgid2imgname.items():
if key in imgid2anno:
anno = imgid2anno[key]
line_dict = {}
line_dict['file_name'] = value['file_name']
line_dict['text'] = value['text']
if img_size:
line_dict['height'] = value['height']
line_dict['width'] = value['width']
line_dict['annotations'] = anno
lines.append(json.dumps(line_dict))
list_to_file(out_path, lines)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--data-root', help='data root for both image file and anno file')
parser.add_argument(
'--in-path',
help='mapping file of image_name and ann_file,'
' "image_name ann_file" in each line')
parser.add_argument(
'--out-path', help='output txt path with line-json format')
args = parser.parse_args()
return args
def main():
args = parse_args()
imgid2imgname, imgid2anno = parse_old_label(args.data_root, args.in_path)
gen_line_dict_file(args.out_path, imgid2imgname, imgid2anno)
print('finish')
if __name__ == '__main__':
main()
|