File size: 1,038 Bytes
59b7eeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
import hydra
import librosa
import utils
from os.path import expanduser, exists, basename, join
from utils import read_filelist, write_filelist, find_all_files
from tqdm import tqdm

@hydra.main(version_base=None, config_path='config', config_name='default')
def preprocess(cfg):
    os.makedirs('filelists', exist_ok=True)
    # train
    root = cfg.preprocess.datasets.LibriSpeech.root_val
    root = expanduser(root)
    trainfiles = []
    print(f'Root: {root}')
    for subset in cfg.preprocess.datasets.LibriSpeech.testsets:
        files = find_all_files(join(root, subset), '.flac')
        print(f'Found {len(files)} flac files in {subset}')
        for i in range(len(files)):
            files[i][1] = files[i][1].replace(root, '').lstrip('/')
        trainfiles.extend(files)
    
    print(f'Write train filelist to {cfg.preprocess.view.test_filelist}')
    os.makedirs('filelists', exist_ok=True)
    utils.write_filelist(trainfiles, cfg.preprocess.view.test_filelist)

if __name__ == '__main__':
    preprocess()