Spaces:

AnhP
/

RVC-GUI

Running

File size: 8,003 Bytes

e4d8df5

import numpy as np

from numpy.lib.stride_tricks import sliding_window_view

def istft(frames, framesize, hopsize):
    frames = np.atleast_2d(frames)
    assert frames.ndim == 2

    analysis_window_size = np.ravel(framesize)[0]
    synthesis_window_size = np.ravel(framesize)[-1]

    assert analysis_window_size >= synthesis_window_size

    A = asymmetric_analysis_window(analysis_window_size, synthesis_window_size) if analysis_window_size != synthesis_window_size else symmetric_window(analysis_window_size)
    S = asymmetric_synthesis_window(analysis_window_size, synthesis_window_size) if analysis_window_size != synthesis_window_size else symmetric_window(synthesis_window_size)

    W = S * hopsize / np.sum(A * S)
    N = frames.shape[0] * hopsize + analysis_window_size

    y = np.zeros((N), float)

    frames[:,  0] = 0
    frames[:, -1] = 0
    frames0 = sliding_window_view(y, analysis_window_size, writeable=True)[::hopsize]
    frames1 = np.fft.irfft(frames, axis=-1, norm='forward') * W

    for i in range(min(len(frames0), len(frames1))):
        frames0[i] += frames1[i]

    return y

def asymmetric_synthesis_window(analysis_window_size, synthesis_window_size):
    n = analysis_window_size
    m = synthesis_window_size // 2

    right = symmetric_window(2 * m)
    window = np.zeros(n)

    window[n-m-m:n-m] = np.square(right[:m]) / symmetric_window(2 * n - 2 * m)[n-m-m:n-m]
    window[-m:] = right[-m:]

    return window

def asymmetric_analysis_window(analysis_window_size, synthesis_window_size):
    n = analysis_window_size
    m = synthesis_window_size // 2

    window = np.zeros(n)
    window[:n-m] = symmetric_window(2 * n - 2 * m)[:n-m]
    window[-m:] = symmetric_window(2 * m)[-m:]

    return window

def symmetric_window(symmetric_window_size):
    n = symmetric_window_size
    window = 0.5 - 0.5 * np.cos(2 * np.pi * np.arange(n) / n)

    return window

def stft(x, framesize, hopsize):
    x = np.atleast_1d(x)
    assert x.ndim == 1

    analysis_window_size = np.ravel(framesize)[0]
    synthesis_window_size = np.ravel(framesize)[-1]

    assert analysis_window_size >= synthesis_window_size

    W = asymmetric_analysis_window(analysis_window_size, synthesis_window_size) if analysis_window_size != synthesis_window_size else symmetric_window(analysis_window_size)

    frames0 = sliding_window_view(x, analysis_window_size, writeable=False)[::hopsize]
    frames1 = np.fft.rfft(frames0 * W, axis=-1, norm='forward')

    return frames1

def normalize(frames, frames0):
    for i in range(len(frames)):
        a = np.real(frames0[i])
        b = np.real(frames[i])
        a = np.dot(a, a)
        b = np.dot(b, b)

        if b == 0: continue
        frames[i] = np.real(frames[i]) * np.sqrt(a / b) + 1j * np.imag(frames[i])

    return frames

def lowpass(cepstrum, quefrency):
    cepstrum[1:quefrency] *= 2
    cepstrum[quefrency+1:] = 0

    return cepstrum

def lifter(frames, quefrency):
    envelopes = np.zeros(frames.shape)

    for i, frame in enumerate(frames):
        with np.errstate(divide='ignore', invalid='ignore'):
            spectrum = np.log10(np.real(frame)) 

        envelopes[i] = np.power(10, np.real(np.fft.rfft(lowpass(np.fft.irfft(spectrum, norm='forward'), quefrency), norm='forward')))

    return envelopes

def resample(x, factor):
    if factor == 1: return x.copy()
    y = np.zeros(x.shape, dtype=x.dtype)
    
    n = len(x)
    m = int(n * factor)

    i = np.arange(min(n, m))
    k = i * (n / m)

    j = np.trunc(k).astype(int)
    k = k - j

    ok = (0 <= j) & (j < n - 1)
    y[i[ok]] = k[ok] * x[j[ok] + 1] + (1 - k[ok]) * x[j[ok]]

    return y

def shiftpitch(frames, factors, samplerate):
    for i in range(len(frames)):
        magnitudes = np.vstack([resample(np.real(frames[i]), factor) for factor in factors])
        frequencies = np.vstack([resample(np.imag(frames[i]), factor) * factor for factor in factors])

        magnitudes[(frequencies <= 0) | (frequencies >= samplerate / 2)] = 0
        mask = np.argmax(magnitudes, axis=0)

        magnitudes = np.take_along_axis(magnitudes, mask[None,:], axis=0)
        frequencies = np.take_along_axis(frequencies, mask[None,:], axis=0)

        frames[i] = magnitudes + 1j * frequencies

    return frames

def wrap(x):
    return (x + np.pi) % (2 * np.pi) - np.pi

def encode(frames, framesize, hopsize, samplerate):
    M, N = frames.shape
    analysis_framesize = np.ravel(framesize)[0]

    freqinc = samplerate / analysis_framesize
    phaseinc = 2 * np.pi * hopsize / analysis_framesize

    buffer = np.zeros(N)
    data = np.zeros((M, N), complex)

    for m, frame in enumerate(frames):
        arg = np.angle(frame)
        buffer = arg

        i = np.arange(N)
        freq = (i + (wrap((arg - buffer) - i * phaseinc) / phaseinc)) * freqinc

        data[m] = np.abs(frame) + 1j * freq

    return data

def decode(frames, framesize, hopsize, samplerate):
    M, N = frames.shape
    analysis_framesize = np.ravel(framesize)[0]
    synthesis_framesize = np.ravel(framesize)[-1]

    freqinc = samplerate / analysis_framesize
    phaseinc = 2 * np.pi * hopsize / analysis_framesize
    timeshift = 2 * np.pi * synthesis_framesize * np.arange(N) / N if synthesis_framesize != analysis_framesize else 0

    buffer = np.zeros(N)
    data = np.zeros((M, N), complex)

    for m, frame in enumerate(frames):
        i = np.arange(N)
        delta = (i + ((np.imag(frame) - i * freqinc) / freqinc)) * phaseinc
        buffer += delta
        arg = buffer.copy()
        arg -= timeshift 
        data[m] = np.real(frame) * np.exp(1j * arg)

    return data

class StftPitchShift:
    def __init__(self, framesize, hopsize, samplerate):
        self.framesize = framesize
        self.hopsize = hopsize
        self.samplerate = samplerate

    def shiftpitch(self, input, factors = 1, quefrency = 0, distortion = 1, normalization = False):
        input = np.atleast_1d(input)
        dtype = input.dtype
        shape = input.shape

        input = np.squeeze(input)
        if input.ndim != 1: raise ValueError('input.ndim != 1')

        if np.issubdtype(dtype, np.integer):
            a, b = np.iinfo(dtype).min, np.iinfo(dtype).max
            input = ((input.astype(float) - a) / (b - a)) * 2 - 1
        elif not np.issubdtype(dtype, np.floating): raise TypeError('not np.issubdtype(dtype, np.floating)')

        def isnotnormal(x):
            return (np.isinf(x)) | (np.isnan(x)) | (abs(x) < np.finfo(x.dtype).tiny)

        framesize = self.framesize
        hopsize = self.hopsize
        samplerate = self.samplerate

        factors = np.asarray(factors).flatten()
        quefrency = int(quefrency * samplerate)

        frames = encode(stft(input, framesize, hopsize), framesize, hopsize, samplerate)

        if normalization: frames0 = frames.copy()

        if quefrency:
            envelopes = lifter(frames, quefrency)
            mask = isnotnormal(envelopes)

            frames.real /= envelopes
            frames.real[mask] = 0

            if distortion != 1:
                envelopes[mask] = 0

                for i in range(len(envelopes)):
                    envelopes[i] = resample(envelopes[i], distortion)

                mask = isnotnormal(envelopes)

            frames = shiftpitch(frames, factors, samplerate)
            frames.real *= envelopes
            frames.real[mask] = 0
        else: frames = shiftpitch(frames, factors, samplerate)

        if normalization: frames = normalize(frames, frames0)

        output = istft(decode(frames, framesize, hopsize, samplerate), framesize, hopsize)
        output.resize(shape, refcheck=False)

        if np.issubdtype(dtype, np.integer):
            a, b = np.iinfo(dtype).min, np.iinfo(dtype).max
            output = (((output + 1) / 2) * (b - a) + a).clip(a, b).astype(dtype)
        elif output.dtype != dtype: output = output.astype(dtype)

        assert output.dtype == dtype
        assert output.shape == shape

        return output