File size: 2,806 Bytes
29a525e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import argparse
import hparams
import utils
import multiresunet_model
import preprocess_data
import tensorflow as tf
import numpy as np
import librosa
import torchaudio
import torch
if __name__ == '__main__':
args = argparse.ArgumentParser()
args.add_argument('Path',metavar='path',type=str,help='Path to audio track to be separated')
args.add_argument('Source',metavar='source',type=str,help='Desired source to separate')
args.add_argument('Model_path', metavar='path_to_model',type=str,help='Path to saved models')
args.add_argument('Output_path', metavar='output_path',type=str,help='Output path for separated audio')
### Parse args ###
args = args.parse_args()
path_to_audio = args.Path
source = args.Source
path_to_model = args.Model_path
output_path = args.Output_path + source + '.wav'
### Load models ###
model_lf = tf.keras.models.load_model(path_to_model + source + '_lf.h5')
model_hf = tf.keras.models.load_model(path_to_model + source + '_hf.h5')
### Load audio track ###
y, sr = librosa.load(path_to_audio, hparams.sr, mono = True)
### Perform CQT transform on the audio ###
C_lf,dc_lf,nf_lf = preprocess_data.forward_transform(y,hparams.lf_params['min_f'],hparams.lf_params['max_f'],hparams.lf_params['bins_per_octave'], hparams.lf_params['gamma'])
C_hf,dc_hf,nf_hf = preprocess_data.forward_transform(y,hparams.hf_params['min_f'],hparams.hf_params['max_f'],hparams.hf_params['bins_per_octave'], hparams.hf_params['gamma'])
dc_lf[:] = 0
dc_hf[:] = 0
nf_lf[:] = 0
nf_hf[:] = 0
phase_lf = np.angle(C_lf)
phase_hf = np.angle(C_hf)
### Batch Input ###
c_lf = preprocess_data.make_chunks(C_lf)
c_hf = preprocess_data.make_chunks(C_hf)
### Separate LF and HF ###
c_lf = model_lf.predict(c_lf,batch_size = hparams.inference_batch_size)
c_hf = model_hf.predict(c_hf,batch_size = hparams.inference_batch_size)
### Reshape Model Output ###
mag_lf = np.hstack(c_lf[:,:,:,0])[:,:phase_lf.shape[-1]]
mag_hf = np.hstack(c_hf[:,:,:,0])[:,:phase_hf.shape[-1]]
c_lf = mag_lf * np.math.e**(phase_lf*1j)
c_hf = mag_hf * np.math.e**(phase_hf*1j)
### Inverse CQT transform using the mixture phase information ###
y_lf_hat = preprocess_data.backward_transform(c_lf,dc_lf,nf_lf,y.shape[0],hparams.lf_params['min_f'],hparams.lf_params['max_f'],hparams.lf_params['bins_per_octave'], hparams.lf_params['gamma'])
y_hf_hat = preprocess_data.backward_transform(c_hf,dc_hf,nf_hf,y.shape[0],hparams.hf_params['min_f'],hparams.hf_params['max_f'],hparams.hf_params['bins_per_octave'], hparams.hf_params['gamma'])
y_hat = y_lf_hat + y_hf_hat
print(mag_lf.shape)
print(mag_hf.shape)
print(y_lf_hat.shape)
print(y_hf_hat.shape)
torchaudio.save(output_path, torch.from_numpy(np.expand_dims(y_hat,0)), hparams.sr)
|