ChatGPT-ImageCaptioner / tools /create_imagenetlvis_json.py
taesiri's picture
Duplicate from taesiri/DeticChatGPT
f97cf44
# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import json
import os
import cv2
from nltk.corpus import wordnet
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--imagenet_path', default='datasets/imagenet/ImageNet-LVIS')
parser.add_argument('--lvis_meta_path', default='datasets/lvis/lvis_v1_val.json')
parser.add_argument('--out_path', default='datasets/imagenet/annotations/imagenet_lvis_image_info.json')
args = parser.parse_args()
print('Loading LVIS meta')
data = json.load(open(args.lvis_meta_path, 'r'))
print('Done')
synset2cat = {x['synset']: x for x in data['categories']}
count = 0
images = []
image_counts = {}
folders = sorted(os.listdir(args.imagenet_path))
for i, folder in enumerate(folders):
class_path = args.imagenet_path + folder
files = sorted(os.listdir(class_path))
synset = wordnet.synset_from_pos_and_offset('n', int(folder[1:])).name()
cat = synset2cat[synset]
cat_id = cat['id']
cat_name = cat['name']
cat_images = []
for file in files:
count = count + 1
file_name = '{}/{}'.format(folder, file)
img = cv2.imread('{}/{}'.format(args.imagenet_path, file_name))
h, w = img.shape[:2]
image = {
'id': count,
'file_name': file_name,
'pos_category_ids': [cat_id],
'width': w,
'height': h
}
cat_images.append(image)
images.extend(cat_images)
image_counts[cat_id] = len(cat_images)
print(i, cat_name, len(cat_images))
print('# Images', len(images))
for x in data['categories']:
x['image_count'] = image_counts[x['id']] if x['id'] in image_counts else 0
out = {'categories': data['categories'], 'images': images, 'annotations': []}
print('Writing to', args.out_path)
json.dump(out, open(args.out_path, 'w'))