norae / docker /vocalsplit /augment.py
kopyl's picture
Upload folder using huggingface_hub
abf6bf2 verified
import argparse
import os
import subprocess
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm
from lib import dataset
from lib import spec_utils
if __name__ == '__main__':
p = argparse.ArgumentParser()
p.add_argument('--sr', '-r', type=int, default=44100)
p.add_argument('--hop_length', '-l', type=int, default=1024)
p.add_argument('--n_fft', '-f', type=int, default=2048)
p.add_argument('--pitch', '-p', type=int, default=-1)
p.add_argument('--mixtures', '-m', required=True)
p.add_argument('--instruments', '-i', required=True)
args = p.parse_args()
input_i = 'input_i_{}.wav'.format(args.pitch)
input_v = 'input_v_{}.wav'.format(args.pitch)
output_i = 'output_i_{}.wav'.format(args.pitch)
output_v = 'output_v_{}.wav'.format(args.pitch)
cmd_i = 'soundstretch {} {} -pitch={}'.format(input_i, output_i, args.pitch)
cmd_v = 'soundstretch {} {} -pitch={}'.format(input_v, output_v, args.pitch)
cache_suffix = '_pitch{}.npy'.format(args.pitch)
cache_dir = 'sr{}_hl{}_nf{}'.format(args.sr, args. hop_length, args.n_fft)
mix_cache_dir = os.path.join(args.mixtures, cache_dir)
inst_cache_dir = os.path.join(args.instruments, cache_dir)
os.makedirs(mix_cache_dir, exist_ok=True)
os.makedirs(inst_cache_dir, exist_ok=True)
filelist = dataset.make_pair(args.mixtures, args.instruments)
for mix_path, inst_path in tqdm(filelist):
mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
mix_cache_path = os.path.join(mix_cache_dir, mix_basename + cache_suffix)
inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
inst_cache_path = os.path.join(inst_cache_dir, inst_basename + cache_suffix)
if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
continue
X, _ = librosa.load(
mix_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
y, _ = librosa.load(
inst_path, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
X, y = spec_utils.align_wave_head_and_tail(X, y, args.sr)
v = X - y
sf.write(input_i, y.T, args.sr)
sf.write(input_v, v.T, args.sr)
subprocess.call(cmd_i, stderr=subprocess.DEVNULL)
subprocess.call(cmd_v, stderr=subprocess.DEVNULL)
y, _ = librosa.load(
output_i, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
v, _ = librosa.load(
output_v, sr=args.sr, mono=False, dtype=np.float32, res_type='kaiser_fast')
X = y + v
spec = spec_utils.wave_to_spectrogram(X, args.hop_length, args.n_fft)
np.save(mix_cache_path, spec)
spec = spec_utils.wave_to_spectrogram(y, args.hop_length, args.n_fft)
np.save(inst_cache_path, spec)
os.remove(input_i)
os.remove(input_v)
os.remove(output_i)
os.remove(output_v)