File size: 454 Bytes
32b542e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import os

captions = []
urls = []

with open('Train_GCC-training.tsv') as fp:
    for cnt, line in enumerate(fp):
        s = line.split('\t')
        captions.append(s[0].split(' '))
        urls.append(s[1][:-1])
        
with open('train4download.txt', 'w') as fp:
    for cnt, url in enumerate(urls):
        fp.write("../train_image/{:08d}.jpg\t\"{}\"\n".format(cnt, url))

if not os.path.exists('../train_image'):
    os.makedirs('../train_image')