|
import json |
|
import os |
|
from glob import glob |
|
imagefile = open('dataset/SBU_captioned_photo_dataset_urls.txt', 'r').readlines() |
|
captionfile = open('dataset/SBU_captioned_photo_dataset_captions.txt', 'r').readlines() |
|
|
|
valid_list = list(glob("images/*")) |
|
valid_list = [ i.split('/')[-1] for i in valid_list] |
|
|
|
|
|
name2cap = {} |
|
for imageurl, caption in zip(imagefile, captionfile): |
|
filename = imageurl.strip().split('/')[-1] |
|
name2cap[filename] = caption.strip() |
|
|
|
data_list = {} |
|
for valid_img in valid_list: |
|
data_list[valid_img]=name2cap[valid_img] |
|
|
|
fp = open('annotations/subcaption.json', 'w') |
|
json.dump(data_list, fp) |
|
|
|
print(len(data_list)) |
|
|