unit_test / data /preprocess /sbu /make_sbu_json.py
herrius's picture
Upload 259 files
32b542e
raw
history blame contribute delete
665 Bytes
import json
import os
from glob import glob
imagefile = open('dataset/SBU_captioned_photo_dataset_urls.txt', 'r').readlines()
captionfile = open('dataset/SBU_captioned_photo_dataset_captions.txt', 'r').readlines()
valid_list = list(glob("images/*"))
valid_list = [ i.split('/')[-1] for i in valid_list]
name2cap = {}
for imageurl, caption in zip(imagefile, captionfile):
filename = imageurl.strip().split('/')[-1]
name2cap[filename] = caption.strip()
data_list = {}
for valid_img in valid_list:
data_list[valid_img]=name2cap[valid_img]
fp = open('annotations/subcaption.json', 'w')
json.dump(data_list, fp)
print(len(data_list))