import os | |
captions = [] | |
urls = [] | |
with open('cc12m.tsv') as fp: | |
for cnt, line in enumerate(fp): | |
s = line.split('\t') | |
captions.append(s[0].split(' ')) | |
urls.append(s[1][:-1]) | |
with open('train4download.txt', 'w') as fp: | |
for cnt, url in enumerate(urls): | |
fp.write("../train_image/{:08d}.jpg\t\"{}\"\n".format(cnt, url)) | |
if not os.path.exists('../train_image'): | |
os.makedirs('../train_image') |