# -*- coding:utf-8 -*-
# @FileName  :uvr.py
# @Time      :2023/8/2 10:47
# @Author    :lovemefan
# @Email     :lovemefan@outlook.com
import os.path

import numpy as np
import librosa
from tqdm import tqdm

from uvronnx.src.config import UVR_CONFIG
from uvronnx.src.ortInferSession import UVROrtInferSession
from uvronnx.src.utils import spec_utils
from uvronnx.src.utils.AudioHelper import AudioReader
from uvronnx.src.utils.spec_utils import make_padding


class UVRModel:
    def __init__(self, model_path=None):
        project_dir = os.path.dirname(os.path.dirname(__file__))
        model_path = model_path or os.path.join(project_dir, 'onnx/uvr-sim.onnx')
        assert os.path.exists(model_path), f"{model_path} is not exist"

        self.model = UVROrtInferSession({
            'model_path': model_path,
            'use_cuda': False
        })
        self.offset = 128
        self.window_size = 512

    def preprocess(x_spec):
        x_mag = np.abs(x_spec)
        x_phase = np.angle(x_spec)

        return x_mag, x_phase

    def separate_offline(self, mixed_audio, sample_rate=44100):
        if isinstance(mixed_audio, str):
            mixed_audio, sample_rate = AudioReader.read_wav_file(mixed_audio)

        x_wave, y_wave, x_spec_s, y_spec_s = {}, {}, {}, {}
        bands_n = len(UVR_CONFIG['band'])
        for d in range(bands_n, 0, -1):
            bp = UVR_CONFIG['band'][d]
            if d == bands_n:  # high-end band
                x_wave[d] = mixed_audio
                if x_wave[d].ndim == 1:
                    x_wave[d] = np.asfortranarray([x_wave[d], x_wave[d]])
            else:  # lower bands
                x_wave[d] = librosa.core.resample(x_wave[d + 1], orig_sr=UVR_CONFIG['band'][d + 1]['sr'], target_sr=bp['sr'],
                                                  res_type=bp['res_type'])
            # Stft of wave source
            x_spec_s[d] = spec_utils.wave_to_spectrogram_mt(x_wave[d], bp['hl'], bp['n_fft'], UVR_CONFIG['mid_side'],
                                                            UVR_CONFIG['mid_side_b2'], UVR_CONFIG['reverse'])
            # pdb.set_trace()
            if d == bands_n:
                input_high_end_h = (bp['n_fft'] // 2 - bp['crop_stop']) + (
                        UVR_CONFIG['pre_filter_stop'] - UVR_CONFIG['pre_filter_start'])
                input_high_end = x_spec_s[d][:, bp['n_fft'] // 2 - input_high_end_h:bp['n_fft'] // 2, :]

        x_spec_m = spec_utils.combine_spectrograms(x_spec_s, UVR_CONFIG)

        def preprocess(x_spec):
            x_mag = np.abs(x_spec)
            x_phase = np.angle(x_spec)
            return x_mag, x_phase

        x_mag, x_phase = preprocess(x_spec_m)

        coef = x_mag.max()
        x_mag_pre = x_mag / coef

        n_frame = x_mag_pre.shape[2]
        pad_l, pad_r, roi_size = make_padding(n_frame,
                                              self.window_size, self.offset)
        n_window = int(np.ceil(n_frame / roi_size))

        x_mag_pad = np.pad(
            x_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')

        preds = []

        iterations = [n_window]

        total_iterations = sum(iterations)
        for i in tqdm(range(n_window)):
            start = i * roi_size
            x_mag_window = x_mag_pad[None, :, :, start:start + self.window_size]
            # if (is_half == True): x_mag_window = x_mag_window.half()

            h = self.model(x_mag_window)
            pred = h[:, :, :, self.offset:-self.offset]
            assert pred.shape[3] > 0

            preds.append(pred[0])

        pred = np.concatenate(preds, axis=2)
        pred = pred[:, :, :n_frame]
        pred, x_mag, x_phase = pred * coef, x_mag, np.exp(1.j * x_phase)

        y_spec_m = pred * x_phase
        v_spec_m = x_spec_m - y_spec_m

        input_high_end_ = spec_utils.mirroring('mirroring', y_spec_m, input_high_end, UVR_CONFIG)
        wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, UVR_CONFIG, input_high_end_h,
                                                            input_high_end_)
        print('instruments done')

        input_high_end_ = spec_utils.mirroring('mirroring', v_spec_m, input_high_end, UVR_CONFIG)
        wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, UVR_CONFIG, input_high_end_h, input_high_end_)

        return wav_instrument, wav_vocals


if __name__ == '__main__':
    model = UVRModel()
    audio, sample_rate = AudioReader.read_wav_file('/Users/cenglingfan/Downloads/晴天.wav_-4key_fumin.wav')
    instrument, vocal = model.separate_offline(audio, sample_rate)
    print(instrument)
    print(vocal)