File size: 2,870 Bytes
1e4a2ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import gc
import sys
import torch
import librosa

import numpy as np
import torch.nn.functional as F

sys.path.append(os.getcwd())

from main.library import opencl

def autotune_f0(note_dict, f0, f0_autotune_strength):
    autotuned_f0 = np.zeros_like(f0)

    for i, freq in enumerate(f0):
        autotuned_f0[i] = freq + (min(note_dict, key=lambda x: abs(x - freq)) - freq) * f0_autotune_strength

    return autotuned_f0

def change_rms(source_audio, source_rate, target_audio, target_rate, rate):
    rms2 = F.interpolate(torch.from_numpy(librosa.feature.rms(y=target_audio, frame_length=target_rate // 2 * 2, hop_length=target_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze()
    return (target_audio * (torch.pow(F.interpolate(torch.from_numpy(librosa.feature.rms(y=source_audio, frame_length=source_rate // 2 * 2, hop_length=source_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze(), 1 - rate) * torch.pow(torch.maximum(rms2, torch.zeros_like(rms2) + 1e-6), rate - 1)).numpy())

def clear_gpu_cache():
    gc.collect()

    if torch.cuda.is_available(): torch.cuda.empty_cache()
    elif torch.backends.mps.is_available(): torch.mps.empty_cache()
    elif opencl.is_available(): opencl.pytorch_ocl.empty_cache()

def extract_median_f0(f0):
    f0 = np.where(f0 == 0, np.nan, f0)
    return float(np.median(np.interp(np.arange(len(f0)), np.where(~np.isnan(f0))[0], f0[~np.isnan(f0)])))

def proposal_f0_up_key(f0, target_f0 = 155.0, limit = 12):
    return max(-limit, min(limit, int(np.round(12 * np.log2(target_f0 / extract_median_f0(f0))))))

def get_onnx_argument(net_g, feats, p_len, sid, pitch, pitchf, energy, pitch_guidance, energy_use):
    inputs = {
        net_g.get_inputs()[0].name: feats.cpu().numpy().astype(np.float32),
        net_g.get_inputs()[1].name: p_len.cpu().numpy(),
        net_g.get_inputs()[2].name: np.array([sid.cpu().item()], dtype=np.int64),
        net_g.get_inputs()[3].name: np.random.randn(1, 192, p_len).astype(np.float32)
    }

    if energy_use:
        if pitch_guidance:
            inputs.update({
                net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64),
                net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32),
                net_g.get_inputs()[6].name: energy.cpu().numpy().astype(np.float32)
            })
        else:
            inputs.update({
                net_g.get_inputs()[4].name: energy.cpu().numpy().astype(np.float32)
            })
    else:
        if pitch_guidance:
            inputs.update({
                net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64),
                net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32)
            })

    return inputs