File size: 806 Bytes
bfea304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python3

import json

with open('train_task2_labels.json', 'r', encoding='utf8') as f:
    d = json.load(f)

with open('gt.txt', 'w', encoding='utf8') as f:
    for k, v in d.items():
        if len(v) != 1:
            print('error', v)
        v = v[0]
        if v['language'].lower() != 'latin':
            # print('Skipping non-Latin:', v)
            continue
        if v['illegibility']:
            # print('Skipping unreadable:', v)
            continue
        label = v['transcription'].strip()
        if not label:
            # print('Skipping blank label')
            continue
        if '#' in label and label != 'LocaL#3':
            # print('Skipping corrupted label')
            continue
        f.write('\t'.join(['train_task2_images/' + k + '.jpg', label]) + '\n')