Spaces:
Build error
Build error
fix build error
#2
by
johntito
- opened
- .gitignore +2 -0
- api.py +76 -0
- api_example.py +47 -0
- app.py +113 -60
- monotonic_align/.gitignore +161 -0
- monotonic_align/LICENSE.md +22 -0
- monotonic_align/PKG-INFO +32 -0
- monotonic_align/README.md +16 -0
- monotonic_align/__init__.py +0 -19
- monotonic_align/__pycache__/__init__.cpython-39.pyc +0 -0
- monotonic_align/build/temp.win-amd64-3.9/Release/core.cp39-win_amd64.exp +0 -0
- monotonic_align/build/temp.win-amd64-3.9/Release/core.cp39-win_amd64.lib +0 -0
- monotonic_align/build/temp.win-amd64-3.9/Release/core.obj +0 -0
- monotonic_align/monotonic_align/core.cp39-win_amd64.pyd +0 -0
- monotonic_align/pyproject.toml +33 -0
- monotonic_align/setup.cfg +4 -0
- monotonic_align/setup.py +7 -6
- monotonic_align/{core.c → src/core.c} +0 -0
- monotonic_align/{core.pyx → src/core.pyx} +1 -0
- requirements.txt +17 -13
- utils.py +7 -5
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
myenv
|
2 |
+
venv
|
api.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
import numpy as np
|
5 |
+
import base64
|
6 |
+
import io
|
7 |
+
from scipy.io.wavfile import write
|
8 |
+
import sounddevice as sd
|
9 |
+
|
10 |
+
# 自定义模块
|
11 |
+
import commons
|
12 |
+
import utils
|
13 |
+
from models import SynthesizerTrn
|
14 |
+
from text.symbols import symbols
|
15 |
+
from text import text_to_sequence
|
16 |
+
|
17 |
+
# 检查 PyTorch 版本
|
18 |
+
print(torch.__version__)
|
19 |
+
|
20 |
+
# 检查 CUDA 是否可用
|
21 |
+
print(torch.cuda.is_available())
|
22 |
+
|
23 |
+
# 检查当前 CUDA 版本
|
24 |
+
print(torch.version.cuda)
|
25 |
+
|
26 |
+
# FastAPI 应用
|
27 |
+
app = FastAPI()
|
28 |
+
|
29 |
+
# 请求体模型
|
30 |
+
class TextRequest(BaseModel):
|
31 |
+
text: str
|
32 |
+
|
33 |
+
# 加载配置和模型
|
34 |
+
config_path = "configs/steins_gate_base.json"
|
35 |
+
checkpoint_path = "G_265000.pth"
|
36 |
+
hps = utils.get_hparams_from_file(config_path)
|
37 |
+
net_g = SynthesizerTrn(
|
38 |
+
len(symbols),
|
39 |
+
hps.data.filter_length // 2 + 1,
|
40 |
+
hps.train.segment_size // hps.data.hop_length,
|
41 |
+
**hps.model,
|
42 |
+
).eval()
|
43 |
+
utils.load_checkpoint(checkpoint_path, net_g, None)
|
44 |
+
|
45 |
+
# 文本到语音合成
|
46 |
+
def text_to_speech(content):
|
47 |
+
stn_tst = text_to_sequence(content, hps.data.text_cleaners)
|
48 |
+
if hps.data.add_blank:
|
49 |
+
stn_tst = commons.intersperse(stn_tst, 0)
|
50 |
+
stn_tst = torch.LongTensor(stn_tst)
|
51 |
+
with torch.no_grad():
|
52 |
+
x_tst = stn_tst.unsqueeze(0)
|
53 |
+
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
|
54 |
+
audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=0.667, noise_scale_w=0.8, length_scale=1)[0][0, 0].data.float().numpy()
|
55 |
+
|
56 |
+
return hps.data.sampling_rate, audio
|
57 |
+
|
58 |
+
# API 路由:文本转语音
|
59 |
+
@app.post("/synthesize")
|
60 |
+
def synthesize(request: TextRequest):
|
61 |
+
# 假设 text_to_speech 是生成音频的函数
|
62 |
+
sampling_rate, audio = text_to_speech(request.text)
|
63 |
+
|
64 |
+
# 将音频数据保存到 BytesIO 对象
|
65 |
+
wav_bytes = io.BytesIO()
|
66 |
+
write(wav_bytes, sampling_rate, (audio * 32767).astype(np.int16))
|
67 |
+
wav_bytes.seek(0) # 将指针移动到文件开头
|
68 |
+
|
69 |
+
# 将 WAV 文件编码为 Base64
|
70 |
+
audio_base64 = base64.b64encode(wav_bytes.read()).decode("utf-8")
|
71 |
+
return {"audio": audio_base64}
|
72 |
+
|
73 |
+
# 主函数
|
74 |
+
if __name__ == "__main__":
|
75 |
+
import uvicorn
|
76 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
api_example.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import base64
|
4 |
+
import io
|
5 |
+
import sounddevice as sd
|
6 |
+
import soundfile as sf
|
7 |
+
|
8 |
+
# API 地址
|
9 |
+
url = "http://127.0.0.1:8000/synthesize"
|
10 |
+
|
11 |
+
# 请求体
|
12 |
+
payload = {"text": "Hello, world!"}
|
13 |
+
|
14 |
+
# 请求头
|
15 |
+
headers = {"Content-Type": "application/json"}
|
16 |
+
|
17 |
+
# 发送 POST 请求
|
18 |
+
try:
|
19 |
+
print("Sending request to the API...")
|
20 |
+
response = requests.post(url, data=json.dumps(payload), headers=headers)
|
21 |
+
response.raise_for_status() # 检查请求是否成功
|
22 |
+
result = response.json()
|
23 |
+
print("Response received.")
|
24 |
+
|
25 |
+
# 检查返回的音频数据
|
26 |
+
if "audio" in result:
|
27 |
+
print("Audio data received as Base64.")
|
28 |
+
|
29 |
+
# 解码 Base64 音频数据
|
30 |
+
audio_base64 = result["audio"]
|
31 |
+
audio_bytes = base64.b64decode(audio_base64)
|
32 |
+
|
33 |
+
# 使用 soundfile 直接从内存中读取音频数据
|
34 |
+
try:
|
35 |
+
audio_data, samplerate = sf.read(io.BytesIO(audio_bytes))
|
36 |
+
print(f"Playing audio (sample rate: {samplerate} Hz)...")
|
37 |
+
sd.play(audio_data, samplerate)
|
38 |
+
sd.wait() # 等待播放完成
|
39 |
+
print("Audio playback finished.")
|
40 |
+
except sf.LibsndfileError as e:
|
41 |
+
print(f"Error reading audio data: {e}")
|
42 |
+
else:
|
43 |
+
print("Unexpected response format. No audio data found.")
|
44 |
+
except requests.exceptions.RequestException as e:
|
45 |
+
print(f"Request failed: {e}")
|
46 |
+
except Exception as e:
|
47 |
+
print(f"An error occurred: {e}")
|
app.py
CHANGED
@@ -1,25 +1,11 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
import os
|
6 |
-
os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
|
7 |
-
import gradio as gr
|
8 |
-
|
9 |
-
import matplotlib.pyplot as plt
|
10 |
-
import IPython.display as ipd
|
11 |
-
|
12 |
import os
|
13 |
-
import
|
14 |
-
import math
|
15 |
-
import torch
|
16 |
-
from torch import nn
|
17 |
-
from torch.nn import functional as F
|
18 |
-
from torch.utils.data import DataLoader
|
19 |
import logging
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
numba_logger.setLevel(logging.WARNING)
|
23 |
import commons
|
24 |
import utils
|
25 |
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
|
@@ -27,44 +13,111 @@ from models import SynthesizerTrn
|
|
27 |
from text.symbols import symbols
|
28 |
from text import text_to_sequence
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import subprocess
|
|
|
|
|
|
|
|
|
|
|
3 |
import logging
|
4 |
+
import torch
|
5 |
+
import gradio as gr
|
6 |
+
from scipy.io.wavfile import write
|
7 |
|
8 |
+
# 自定义模块
|
|
|
9 |
import commons
|
10 |
import utils
|
11 |
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
|
|
|
13 |
from text.symbols import symbols
|
14 |
from text import text_to_sequence
|
15 |
|
16 |
+
# 配置日志
|
17 |
+
logging.basicConfig(level=logging.INFO)
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
+
|
20 |
+
# 编译 monotonic_align 模块
|
21 |
+
def compile_monotonic_align():
|
22 |
+
try:
|
23 |
+
|
24 |
+
os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
|
25 |
+
|
26 |
+
logger.info("Successfully compiled monotonic_align.")
|
27 |
+
except subprocess.CalledProcessError as e:
|
28 |
+
logger.error(f"Failed to compile monotonic_align: {e}")
|
29 |
+
raise RuntimeError("Compilation of monotonic_align failed.")
|
30 |
+
|
31 |
+
# 加载配置和模型
|
32 |
+
def load_config_and_model(config_path, checkpoint_path):
|
33 |
+
if not os.path.exists(config_path):
|
34 |
+
raise FileNotFoundError(f"Config file not found: {config_path}")
|
35 |
+
if not os.path.exists(checkpoint_path):
|
36 |
+
raise FileNotFoundError(f"Checkpoint file not found: {checkpoint_path}")
|
37 |
+
|
38 |
+
# 加载超参数
|
39 |
+
hps = utils.get_hparams_from_file(config_path)
|
40 |
+
logger.info("Loaded hyperparameters from config file.")
|
41 |
+
|
42 |
+
# 初始化模型
|
43 |
+
net_g = SynthesizerTrn(
|
44 |
+
len(symbols),
|
45 |
+
hps.data.filter_length // 2 + 1,
|
46 |
+
hps.train.segment_size // hps.data.hop_length,
|
47 |
+
**hps.model,
|
48 |
+
)
|
49 |
+
net_g.eval()
|
50 |
+
logger.info("Initialized SynthesizerTrn model.")
|
51 |
+
|
52 |
+
# 加载预训练权重
|
53 |
+
utils.load_checkpoint(checkpoint_path, net_g, None)
|
54 |
+
logger.info(f"Loaded model checkpoint from {checkpoint_path}.")
|
55 |
+
|
56 |
+
return hps, net_g
|
57 |
+
|
58 |
+
# 文本到语音合成
|
59 |
+
def text_to_speech(content, hps, net_g):
|
60 |
+
if not content or not isinstance(content, str):
|
61 |
+
raise ValueError("Input text is empty or invalid.")
|
62 |
+
|
63 |
+
try:
|
64 |
+
# 将文本转换为序列
|
65 |
+
stn_tst = text_to_sequence(content, hps.data.text_cleaners)
|
66 |
+
if hps.data.add_blank:
|
67 |
+
stn_tst = commons.intersperse(stn_tst, 0)
|
68 |
+
stn_tst = torch.LongTensor(stn_tst)
|
69 |
+
|
70 |
+
# 模型推理
|
71 |
+
with torch.no_grad():
|
72 |
+
x_tst = stn_tst.unsqueeze(0)
|
73 |
+
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
|
74 |
+
audio = net_g.infer(
|
75 |
+
x_tst, x_tst_lengths, noise_scale=0.667, noise_scale_w=0.8, length_scale=1
|
76 |
+
)[0][0, 0].data.float().numpy()
|
77 |
+
|
78 |
+
return hps.data.sampling_rate, audio
|
79 |
+
except Exception as e:
|
80 |
+
logger.error(f"Error during text-to-speech synthesis: {e}")
|
81 |
+
raise RuntimeError("Failed to generate audio.")
|
82 |
+
|
83 |
+
# Gradio 界面
|
84 |
+
def create_gradio_interface(hps, net_g):
|
85 |
+
def safe_syn(content):
|
86 |
+
try:
|
87 |
+
return text_to_speech(content, hps, net_g)
|
88 |
+
except Exception as e:
|
89 |
+
logger.error(f"Error in Gradio interface: {e}")
|
90 |
+
return None
|
91 |
+
|
92 |
+
app = gr.Blocks()
|
93 |
+
with app:
|
94 |
+
with gr.Tabs():
|
95 |
+
with gr.TabItem("Basic"):
|
96 |
+
input1 = gr.Textbox(label="Input Text", placeholder="Enter text here...")
|
97 |
+
submit = gr.Button("Convert", variant="primary")
|
98 |
+
output1 = gr.Audio(label="Output Audio")
|
99 |
+
submit.click(safe_syn, input1, output1)
|
100 |
+
|
101 |
+
return app
|
102 |
+
|
103 |
+
# 主函数
|
104 |
+
def main():
|
105 |
+
try:
|
106 |
+
# 编译 monotonic_align
|
107 |
+
compile_monotonic_align()
|
108 |
+
|
109 |
+
# 加载配置和模型
|
110 |
+
config_path = "configs/steins_gate_base.json"
|
111 |
+
checkpoint_path = "G_265000.pth"
|
112 |
+
hps, net_g = load_config_and_model(config_path, checkpoint_path)
|
113 |
+
|
114 |
+
# 创建 Gradio 界面
|
115 |
+
app = create_gradio_interface(hps, net_g)
|
116 |
+
logger.info("Starting Gradio interface...")
|
117 |
+
app.launch()
|
118 |
+
except Exception as e:
|
119 |
+
logger.critical(f"Fatal error: {e}")
|
120 |
+
exit(1)
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
main()
|
monotonic_align/.gitignore
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
.vscode/
|
monotonic_align/LICENSE.md
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Moonsik Park
|
4 |
+
Copyright (c) 2021 Jaehyeon Kim
|
5 |
+
|
6 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 |
+
of this software and associated documentation files (the "Software"), to deal
|
8 |
+
in the Software without restriction, including without limitation the rights
|
9 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10 |
+
copies of the Software, and to permit persons to whom the Software is
|
11 |
+
furnished to do so, subject to the following conditions:
|
12 |
+
|
13 |
+
The above copyright notice and this permission notice shall be included in all
|
14 |
+
copies or substantial portions of the Software.
|
15 |
+
|
16 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22 |
+
SOFTWARE.
|
monotonic_align/PKG-INFO
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: monotonic_align
|
3 |
+
Version: 1.0.0
|
4 |
+
Summary: Monotonic Alignment Search module
|
5 |
+
Author-email: Moonsik Park <[email protected]>
|
6 |
+
Project-URL: repository, https://github.com/moonsikpark/monotonic_align
|
7 |
+
Classifier: Development Status :: 5 - Production/Stable
|
8 |
+
Classifier: License :: OSI Approved :: MIT License
|
9 |
+
Classifier: Operating System :: OS Independent
|
10 |
+
Classifier: Programming Language :: Python :: 3
|
11 |
+
Classifier: Programming Language :: Cython
|
12 |
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
13 |
+
Requires-Python: >=3.7
|
14 |
+
Description-Content-Type: text/markdown
|
15 |
+
License-File: LICENSE.md
|
16 |
+
|
17 |
+
# Monotonic Alignment Search
|
18 |
+
|
19 |
+
This module does Monotonic Alignment Search required by the [Vits](https://github.com/jaywalnut310/vits) TTS model, and has been copied from [this source](https://github.com/jaywalnut310/vits/tree/2b91ceff252082644bd507d13476a49ea260cadf/monotonic_align).
|
20 |
+
|
21 |
+
The module have been packaged properly as per [PEP 621](https://peps.python.org/pep-0621/), and has a binary distrubution compiled to nearly every UNIX environment.
|
22 |
+
|
23 |
+
## License
|
24 |
+
|
25 |
+
Please note that the author(s) are not liable for any claim, damages or other liability thereof.
|
26 |
+
|
27 |
+
```
|
28 |
+
MIT License
|
29 |
+
|
30 |
+
Copyright (c) 2023 Moonsik Park
|
31 |
+
Copyright (c) 2021 Jaehyeon Kim
|
32 |
+
````
|
monotonic_align/README.md
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Monotonic Alignment Search
|
2 |
+
|
3 |
+
This module does Monotonic Alignment Search required by the [Vits](https://github.com/jaywalnut310/vits) TTS model, and has been copied from [this source](https://github.com/jaywalnut310/vits/tree/2b91ceff252082644bd507d13476a49ea260cadf/monotonic_align).
|
4 |
+
|
5 |
+
The module have been packaged properly as per [PEP 621](https://peps.python.org/pep-0621/), and has a binary distrubution compiled to nearly every UNIX environment.
|
6 |
+
|
7 |
+
## License
|
8 |
+
|
9 |
+
Please note that the author(s) are not liable for any claim, damages or other liability thereof.
|
10 |
+
|
11 |
+
```
|
12 |
+
MIT License
|
13 |
+
|
14 |
+
Copyright (c) 2023 Moonsik Park
|
15 |
+
Copyright (c) 2021 Jaehyeon Kim
|
16 |
+
````
|
monotonic_align/__init__.py
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
-
from .monotonic_align.core import maximum_path_c
|
4 |
-
|
5 |
-
|
6 |
-
def maximum_path(neg_cent, mask):
|
7 |
-
""" Cython optimized version.
|
8 |
-
neg_cent: [b, t_t, t_s]
|
9 |
-
mask: [b, t_t, t_s]
|
10 |
-
"""
|
11 |
-
device = neg_cent.device
|
12 |
-
dtype = neg_cent.dtype
|
13 |
-
neg_cent = neg_cent.data.cpu().numpy().astype(np.float32)
|
14 |
-
path = np.zeros(neg_cent.shape, dtype=np.int32)
|
15 |
-
|
16 |
-
t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(np.int32)
|
17 |
-
t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(np.int32)
|
18 |
-
maximum_path_c(path, neg_cent, t_t_max, t_s_max)
|
19 |
-
return torch.from_numpy(path).to(device=device, dtype=dtype)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
monotonic_align/__pycache__/__init__.cpython-39.pyc
DELETED
Binary file (804 Bytes)
|
|
monotonic_align/build/temp.win-amd64-3.9/Release/core.cp39-win_amd64.exp
DELETED
Binary file (746 Bytes)
|
|
monotonic_align/build/temp.win-amd64-3.9/Release/core.cp39-win_amd64.lib
DELETED
Binary file (1.94 kB)
|
|
monotonic_align/build/temp.win-amd64-3.9/Release/core.obj
DELETED
Binary file (720 kB)
|
|
monotonic_align/monotonic_align/core.cp39-win_amd64.pyd
DELETED
Binary file (151 kB)
|
|
monotonic_align/pyproject.toml
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["setuptools==67.4.0", "setuptools-scm[toml]==7.1.0", "Cython==0.29.33"]
|
3 |
+
build-backend = "setuptools.build_meta"
|
4 |
+
|
5 |
+
[project]
|
6 |
+
name = "monotonic_align"
|
7 |
+
authors = [
|
8 |
+
{name = "Moonsik Park", email = "[email protected]"},
|
9 |
+
]
|
10 |
+
description = "Monotonic Alignment Search module"
|
11 |
+
readme = "README.md"
|
12 |
+
requires-python = ">=3.7"
|
13 |
+
classifiers = [
|
14 |
+
"Development Status :: 5 - Production/Stable",
|
15 |
+
"License :: OSI Approved :: MIT License",
|
16 |
+
"Operating System :: OS Independent",
|
17 |
+
"Programming Language :: Python :: 3",
|
18 |
+
"Programming Language :: Cython",
|
19 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
20 |
+
]
|
21 |
+
dynamic = ["version"]
|
22 |
+
|
23 |
+
|
24 |
+
[project.urls]
|
25 |
+
repository = "https://github.com/moonsikpark/monotonic_align"
|
26 |
+
|
27 |
+
[tool.setuptools]
|
28 |
+
include-package-data = false
|
29 |
+
|
30 |
+
[tool.setuptools_scm]
|
31 |
+
|
32 |
+
[tool.setuptools.packages.find]
|
33 |
+
where = ["src"]
|
monotonic_align/setup.cfg
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[egg_info]
|
2 |
+
tag_build =
|
3 |
+
tag_date = 0
|
4 |
+
|
monotonic_align/setup.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
-
from
|
2 |
-
from Cython.Build import cythonize
|
3 |
-
import numpy
|
4 |
|
5 |
setup(
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
9 |
)
|
|
|
1 |
+
from setuptools import Extension, setup
|
|
|
|
|
2 |
|
3 |
setup(
|
4 |
+
ext_modules=[
|
5 |
+
Extension(
|
6 |
+
name="monotonic_align.core",
|
7 |
+
sources=["src/core.pyx"],
|
8 |
+
),
|
9 |
+
]
|
10 |
)
|
monotonic_align/{core.c → src/core.c}
RENAMED
The diff for this file is too large to render.
See raw diff
|
|
monotonic_align/{core.pyx → src/core.pyx}
RENAMED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
cimport cython
|
2 |
from cython.parallel import prange
|
3 |
|
|
|
1 |
+
#cython: language_level=3
|
2 |
cimport cython
|
3 |
from cython.parallel import prange
|
4 |
|
requirements.txt
CHANGED
@@ -1,16 +1,20 @@
|
|
1 |
-
Cython
|
2 |
-
librosa
|
3 |
-
matplotlib
|
4 |
-
numpy
|
5 |
-
phonemizer
|
6 |
-
scipy
|
7 |
-
tensorboard
|
8 |
torch
|
9 |
torchvision
|
10 |
-
Unidecode
|
11 |
-
jamo
|
12 |
-
pypinyin
|
13 |
-
jieba
|
14 |
-
cn2an
|
15 |
IPython
|
16 |
-
pyopenjtalk
|
|
|
|
|
|
|
|
|
|
1 |
+
Cython
|
2 |
+
librosa
|
3 |
+
matplotlib
|
4 |
+
numpy
|
5 |
+
phonemizer
|
6 |
+
scipy
|
7 |
+
tensorboard
|
8 |
torch
|
9 |
torchvision
|
10 |
+
Unidecode
|
11 |
+
jamo
|
12 |
+
pypinyin
|
13 |
+
jieba
|
14 |
+
cn2an
|
15 |
IPython
|
16 |
+
pyopenjtalk
|
17 |
+
|
18 |
+
Pillow
|
19 |
+
Cython
|
20 |
+
gradio
|
utils.py
CHANGED
@@ -18,6 +18,8 @@ logger = logging
|
|
18 |
def load_checkpoint(checkpoint_path, model, optimizer=None):
|
19 |
assert os.path.isfile(checkpoint_path)
|
20 |
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
|
|
|
|
|
21 |
iteration = checkpoint_dict['iteration']
|
22 |
learning_rate = checkpoint_dict['learning_rate']
|
23 |
if optimizer is not None:
|
@@ -85,7 +87,7 @@ def plot_spectrogram_to_numpy(spectrogram):
|
|
85 |
mpl_logger.setLevel(logging.WARNING)
|
86 |
import matplotlib.pylab as plt
|
87 |
import numpy as np
|
88 |
-
|
89 |
fig, ax = plt.subplots(figsize=(10,2))
|
90 |
im = ax.imshow(spectrogram, aspect="auto", origin="lower",
|
91 |
interpolation='none')
|
@@ -147,7 +149,7 @@ def get_hparams(init=True):
|
|
147 |
help='JSON file for configuration')
|
148 |
parser.add_argument('-m', '--model', type=str, required=True,
|
149 |
help='Model name')
|
150 |
-
|
151 |
args = parser.parse_args()
|
152 |
model_dir = os.path.join("../drive/MyDrive", args.model)
|
153 |
|
@@ -165,7 +167,7 @@ def get_hparams(init=True):
|
|
165 |
with open(config_save_path, "r") as f:
|
166 |
data = f.read()
|
167 |
config = json.loads(data)
|
168 |
-
|
169 |
hparams = HParams(**config)
|
170 |
hparams.model_dir = model_dir
|
171 |
return hparams
|
@@ -215,7 +217,7 @@ def get_logger(model_dir, filename="train.log"):
|
|
215 |
global logger
|
216 |
logger = logging.getLogger(os.path.basename(model_dir))
|
217 |
logger.setLevel(logging.DEBUG)
|
218 |
-
|
219 |
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
|
220 |
if not os.path.exists(model_dir):
|
221 |
os.makedirs(model_dir)
|
@@ -232,7 +234,7 @@ class HParams():
|
|
232 |
if type(v) == dict:
|
233 |
v = HParams(**v)
|
234 |
self[k] = v
|
235 |
-
|
236 |
def keys(self):
|
237 |
return self.__dict__.keys()
|
238 |
|
|
|
18 |
def load_checkpoint(checkpoint_path, model, optimizer=None):
|
19 |
assert os.path.isfile(checkpoint_path)
|
20 |
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
|
21 |
+
#checkpoint_dict = torch.load(checkpoint_path, map_location='cuda:0', weights_only=True)
|
22 |
+
|
23 |
iteration = checkpoint_dict['iteration']
|
24 |
learning_rate = checkpoint_dict['learning_rate']
|
25 |
if optimizer is not None:
|
|
|
87 |
mpl_logger.setLevel(logging.WARNING)
|
88 |
import matplotlib.pylab as plt
|
89 |
import numpy as np
|
90 |
+
|
91 |
fig, ax = plt.subplots(figsize=(10,2))
|
92 |
im = ax.imshow(spectrogram, aspect="auto", origin="lower",
|
93 |
interpolation='none')
|
|
|
149 |
help='JSON file for configuration')
|
150 |
parser.add_argument('-m', '--model', type=str, required=True,
|
151 |
help='Model name')
|
152 |
+
|
153 |
args = parser.parse_args()
|
154 |
model_dir = os.path.join("../drive/MyDrive", args.model)
|
155 |
|
|
|
167 |
with open(config_save_path, "r") as f:
|
168 |
data = f.read()
|
169 |
config = json.loads(data)
|
170 |
+
|
171 |
hparams = HParams(**config)
|
172 |
hparams.model_dir = model_dir
|
173 |
return hparams
|
|
|
217 |
global logger
|
218 |
logger = logging.getLogger(os.path.basename(model_dir))
|
219 |
logger.setLevel(logging.DEBUG)
|
220 |
+
|
221 |
formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
|
222 |
if not os.path.exists(model_dir):
|
223 |
os.makedirs(model_dir)
|
|
|
234 |
if type(v) == dict:
|
235 |
v = HParams(**v)
|
236 |
self[k] = v
|
237 |
+
|
238 |
def keys(self):
|
239 |
return self.__dict__.keys()
|
240 |
|