unit_test / data /preprocess /cc3m /cc3m_val_download_list.py
herrius's picture
Upload 259 files
32b542e
raw
history blame contribute delete
459 Bytes
import os
captions = []
urls = []
with open('Validation_GCC-1.1.0-Validation.tsv') as fp:
for cnt, line in enumerate(fp):
s = line.split('\t')
captions.append(s[0].split(' '))
urls.append(s[1][:-1])
with open('val4download.txt', 'w') as fp:
for cnt, url in enumerate(urls):
fp.write("../val_image/{:08d}.jpg\t\"{}\"\n".format(cnt, url))
if not os.path.exists('../val_image'):
os.makedirs('../val_image')