File size: 1,038 Bytes
59b7eeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import os
import hydra
import librosa
import utils
from os.path import expanduser, exists, basename, join
from utils import read_filelist, write_filelist, find_all_files
from tqdm import tqdm
@hydra.main(version_base=None, config_path='config', config_name='default')
def preprocess(cfg):
os.makedirs('filelists', exist_ok=True)
# train
root = cfg.preprocess.datasets.LibriSpeech.root_val
root = expanduser(root)
trainfiles = []
print(f'Root: {root}')
for subset in cfg.preprocess.datasets.LibriSpeech.testsets:
files = find_all_files(join(root, subset), '.flac')
print(f'Found {len(files)} flac files in {subset}')
for i in range(len(files)):
files[i][1] = files[i][1].replace(root, '').lstrip('/')
trainfiles.extend(files)
print(f'Write train filelist to {cfg.preprocess.view.test_filelist}')
os.makedirs('filelists', exist_ok=True)
utils.write_filelist(trainfiles, cfg.preprocess.view.test_filelist)
if __name__ == '__main__':
preprocess()
|