sayashi/vits-uma-genshin-honkai
#6
by
ZJH525835328
- opened
- .gitignore +1 -3
- app.py +0 -9
- monotonic_align/__init__.py +0 -20
- monotonic_align/core.py +0 -36
- monotonic_align/monotonic_align/core.cp38-win_amd64.pyd +0 -0
.gitignore
CHANGED
@@ -377,6 +377,4 @@ monotonic_align/core.c
|
|
377 |
/resources
|
378 |
/MoeGoe.spec
|
379 |
/dist/MoeGoe
|
380 |
-
/dist
|
381 |
-
|
382 |
-
.idea
|
|
|
377 |
/resources
|
378 |
/MoeGoe.spec
|
379 |
/dist/MoeGoe
|
380 |
+
/dist
|
|
|
|
app.py
CHANGED
@@ -11,18 +11,9 @@ import torch
|
|
11 |
from torch import no_grad, LongTensor
|
12 |
import webbrowser
|
13 |
import logging
|
14 |
-
import gradio.processing_utils as gr_processing_utils
|
15 |
logging.getLogger('numba').setLevel(logging.WARNING)
|
16 |
limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
|
17 |
|
18 |
-
audio_postprocess_ori = gr.Audio.postprocess
|
19 |
-
def audio_postprocess(self, y):
|
20 |
-
data = audio_postprocess_ori(self, y)
|
21 |
-
if data is None:
|
22 |
-
return None
|
23 |
-
return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
|
24 |
-
gr.Audio.postprocess = audio_postprocess
|
25 |
-
|
26 |
def get_text(text, hps):
|
27 |
text_norm, clean_text = text_to_sequence(text, hps.symbols, hps.data.text_cleaners)
|
28 |
if hps.data.add_blank:
|
|
|
11 |
from torch import no_grad, LongTensor
|
12 |
import webbrowser
|
13 |
import logging
|
|
|
14 |
logging.getLogger('numba').setLevel(logging.WARNING)
|
15 |
limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def get_text(text, hps):
|
18 |
text_norm, clean_text = text_to_sequence(text, hps.symbols, hps.data.text_cleaners)
|
19 |
if hps.data.add_blank:
|
monotonic_align/__init__.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
from numpy import zeros, int32, float32
|
2 |
-
from torch import from_numpy
|
3 |
-
|
4 |
-
from .core import maximum_path_jit
|
5 |
-
|
6 |
-
|
7 |
-
def maximum_path(neg_cent, mask):
|
8 |
-
""" numba optimized version.
|
9 |
-
neg_cent: [b, t_t, t_s]
|
10 |
-
mask: [b, t_t, t_s]
|
11 |
-
"""
|
12 |
-
device = neg_cent.device
|
13 |
-
dtype = neg_cent.dtype
|
14 |
-
neg_cent = neg_cent.data.cpu().numpy().astype(float32)
|
15 |
-
path = zeros(neg_cent.shape, dtype=int32)
|
16 |
-
|
17 |
-
t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(int32)
|
18 |
-
t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(int32)
|
19 |
-
maximum_path_jit(path, neg_cent, t_t_max, t_s_max)
|
20 |
-
return from_numpy(path).to(device=device, dtype=dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
monotonic_align/core.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
import numba
|
2 |
-
|
3 |
-
|
4 |
-
@numba.jit(numba.void(numba.int32[:, :, ::1], numba.float32[:, :, ::1], numba.int32[::1], numba.int32[::1]),
|
5 |
-
nopython=True, nogil=True)
|
6 |
-
def maximum_path_jit(paths, values, t_ys, t_xs):
|
7 |
-
b = paths.shape[0]
|
8 |
-
max_neg_val = -1e9
|
9 |
-
for i in range(int(b)):
|
10 |
-
path = paths[i]
|
11 |
-
value = values[i]
|
12 |
-
t_y = t_ys[i]
|
13 |
-
t_x = t_xs[i]
|
14 |
-
|
15 |
-
v_prev = v_cur = 0.0
|
16 |
-
index = t_x - 1
|
17 |
-
|
18 |
-
for y in range(t_y):
|
19 |
-
for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
|
20 |
-
if x == y:
|
21 |
-
v_cur = max_neg_val
|
22 |
-
else:
|
23 |
-
v_cur = value[y - 1, x]
|
24 |
-
if x == 0:
|
25 |
-
if y == 0:
|
26 |
-
v_prev = 0.
|
27 |
-
else:
|
28 |
-
v_prev = max_neg_val
|
29 |
-
else:
|
30 |
-
v_prev = value[y - 1, x - 1]
|
31 |
-
value[y, x] += max(v_prev, v_cur)
|
32 |
-
|
33 |
-
for y in range(t_y - 1, -1, -1):
|
34 |
-
path[y, index] = 1
|
35 |
-
if index != 0 and (index == y or value[y - 1, index] < value[y - 1, index - 1]):
|
36 |
-
index = index - 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
monotonic_align/monotonic_align/core.cp38-win_amd64.pyd
ADDED
Binary file (123 kB). View file
|
|