speecht5-tts / manifest /utils /resample_libritts.py
wr
set *.tsv and *.txt to large file
31ad50e
from pathlib import Path
from shutil import copyfile
import soundfile as sf
import librosa
import os
#LibriTTS
# 1.6G /root/data/libritts/LibriTTS/dev-clean
# 1.5G /root/data/libritts/LibriTTS/test-clean
# 9.1G /root/data/libritts/LibriTTS/train-clean-100
# 33G /root/data/libritts/LibriTTS/train-clean-360
# 44G /root/data/libritts/LibriTTS
#LibriTTS_16k
# The pattern "**" means all subdirectories recursively,
# with "*.wav" meaning all files with any name ending in ".wav".
dest_dir = Path("/root/data/libritts/LibriTTS_16k")
dest_dir.mkdir(exist_ok=True)
for file in Path("/root/data/libritts/LibriTTS").glob("**/*"):
if not file.is_file(): # Skip directories
continue
file = str(file)
new_path = Path(file.replace('LibriTTS', 'LibriTTS_16k'))
os.system('mkdir -p ' + str(new_path.parent))
if file.endswith('wav'):
audio, fs = sf.read(file)
x = librosa.resample(audio, fs, 16000)
sf.write(str(new_path), x, 16000)
# librosa.output.write_wav(str(new_path), x, 16000)
else:
copyfile(file, file.replace('LibriTTS', 'LibriTTS_16k'))