Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,85 +1,55 @@
|
|
1 |
-
import
|
2 |
import numpy as np
|
3 |
-
import soundfile as sf
|
4 |
import gradio as gr
|
5 |
-
from
|
6 |
-
from
|
7 |
-
|
8 |
-
from vocoder import inference as vocoder
|
9 |
-
import librosa
|
10 |
-
|
11 |
-
# Определение устройства
|
12 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
-
print(f"Используется устройство: {device}")
|
14 |
|
15 |
# Инициализация моделей
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
#
|
27 |
-
|
28 |
-
if device == "cuda":
|
29 |
-
vocoder.set_device(torch.device("cuda"))
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
def clone_voice(reference_audio_path, text, output_sample_rate=22050):
|
36 |
-
try:
|
37 |
-
# Препроцессинг референсного аудио
|
38 |
-
preprocessed_wav = encoder.preprocess_wav(reference_audio_path)
|
39 |
-
|
40 |
-
# Извлечение эмбеддинга голоса (важное исправление!)
|
41 |
-
embed = encoder.embed_utterance(preprocessed_wav)
|
42 |
-
|
43 |
-
# Синтез спектрограммы (передаем numpy array напрямую)
|
44 |
-
specs = synthesizer.synthesize_spectrograms([text], [embed])
|
45 |
-
spec = specs[0]
|
46 |
-
|
47 |
-
# Синтез аудио с помощью WaveNet
|
48 |
-
generated_wav = vocoder.infer_waveform(spec)
|
49 |
-
|
50 |
-
# Постобработка аудио
|
51 |
-
generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
|
52 |
-
generated_wav = encoder.preprocess_wav(generated_wav)
|
53 |
-
|
54 |
-
# Сохранение файла
|
55 |
-
output_file = "output.wav"
|
56 |
-
sf.write(output_file, generated_wav, output_sample_rate)
|
57 |
-
|
58 |
-
return output_file
|
59 |
-
except Exception as e:
|
60 |
-
print(f"Ошибка при синтезе: {str(e)}")
|
61 |
-
raise gr.Error(f"Ошибка синтеза: {str(e)}")
|
62 |
-
|
63 |
-
def gradio_interface(input_audio, input_text):
|
64 |
-
if input_audio is None or not input_text.strip():
|
65 |
-
raise gr.Error("Загрузите аудио и введите текст")
|
66 |
-
return clone_voice(input_audio, input_text)
|
67 |
|
68 |
# Интерфейс Gradio
|
69 |
-
|
70 |
-
gr.
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
if __name__ == "__main__":
|
85 |
-
|
|
|
1 |
+
import os
|
2 |
import numpy as np
|
|
|
3 |
import gradio as gr
|
4 |
+
from rvc_infer import RVCModel
|
5 |
+
from tts import TortoiseTTS
|
6 |
+
import tempfile
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Инициализация моделей
|
9 |
+
tts = TortoiseTTS(device='cuda' if torch.cuda.is_available() else 'cpu')
|
10 |
+
rvc_models = {
|
11 |
+
'russian': RVCModel(
|
12 |
+
'models/rvc_models/russian_voice/model.pth',
|
13 |
+
'models/rvc_models/russian_voice/model.index'
|
14 |
+
),
|
15 |
+
'multilingual': RVCModel(
|
16 |
+
'models/rvc_models/multilingual_voice/model.pth',
|
17 |
+
'models/rvc_models/multilingual_voice/model.index'
|
18 |
+
)
|
19 |
+
}
|
20 |
+
|
21 |
+
def clone_voice(text, voice_type):
|
22 |
+
# Генерация речи с помощью Tortoise
|
23 |
+
wav, sr = tts.text_to_speech(text, speaker=voice_type)
|
24 |
|
25 |
+
# Конвертация с помощью RVC
|
26 |
+
converted_audio = rvc_models[voice_type].infer(wav)
|
|
|
|
|
27 |
|
28 |
+
# Сохранение во временный файл
|
29 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
|
30 |
+
wavfile.write(fp.name, 16000, converted_audio)
|
31 |
+
return fp.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# Интерфейс Gradio
|
34 |
+
with gr.Blocks(title="Voice Clone") as app:
|
35 |
+
gr.Markdown("# 🎤 Голосовое клонирование RVC v2 + Tortoise TTS")
|
36 |
+
|
37 |
+
with gr.Row():
|
38 |
+
text_input = gr.Textbox(label="Введите текст", lines=3)
|
39 |
+
voice_selector = gr.Dropdown(
|
40 |
+
choices=['russian', 'multilingual'],
|
41 |
+
label="Голос",
|
42 |
+
value='russian'
|
43 |
+
)
|
44 |
+
|
45 |
+
submit_btn = gr.Button("Сгенерировать")
|
46 |
+
audio_output = gr.Audio(label="Результат", type="filepath")
|
47 |
+
|
48 |
+
submit_btn.click(
|
49 |
+
fn=clone_voice,
|
50 |
+
inputs=[text_input, voice_selector],
|
51 |
+
outputs=audio_output
|
52 |
+
)
|
53 |
|
54 |
if __name__ == "__main__":
|
55 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|