Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,9 @@ from pydub import AudioSegment
|
|
7 |
import tempfile
|
8 |
from scipy.io.wavfile import write, read
|
9 |
from TTS.api import TTS
|
|
|
10 |
|
11 |
-
#
|
12 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
13 |
|
14 |
# Глобальные переменные и настройки
|
@@ -59,49 +60,63 @@ def check_audio_length(audio_path, max_duration=120):
|
|
59 |
def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
60 |
tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
|
61 |
wav_data = tts_synthesis.tts(text, speed=speed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
63 |
|
64 |
-
#
|
65 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_wav_file:
|
66 |
temp_tts_wav_path = temp_tts_wav_file.name
|
67 |
-
write(temp_tts_wav_path, 22050,
|
68 |
|
69 |
-
#
|
70 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
71 |
temp_output_wav_path = temp_output_wav_file.name
|
72 |
|
|
|
73 |
tts_conversion.voice_conversion_to_file(temp_tts_wav_path, target_wav=voice_audio_path,
|
74 |
file_path=temp_output_wav_path)
|
75 |
|
76 |
-
#
|
77 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
78 |
|
79 |
-
#
|
80 |
os.remove(temp_tts_wav_path)
|
81 |
os.remove(temp_output_wav_path)
|
82 |
|
83 |
return (output_sample_rate, output_audio_data)
|
84 |
|
85 |
def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
86 |
-
#
|
87 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_output:
|
88 |
temp_tts_output_path = temp_tts_output.name
|
89 |
tts.tts_to_file(text=text, file_path=temp_tts_output_path, speed=speed,
|
90 |
-
|
91 |
|
92 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
93 |
|
94 |
-
#
|
95 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
96 |
temp_output_wav_path = temp_output_wav_file.name
|
97 |
|
|
|
98 |
tts_conversion.voice_conversion_to_file(temp_tts_output_path, target_wav=speaker_wav_path,
|
99 |
-
|
100 |
|
101 |
-
#
|
102 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
103 |
|
104 |
-
#
|
105 |
os.remove(temp_tts_output_path)
|
106 |
os.remove(temp_output_wav_path)
|
107 |
|
@@ -126,7 +141,7 @@ def process_speech(text, speaker_wav_path, selected_language, speed):
|
|
126 |
error = gr.Error(error_message, duration=5)
|
127 |
raise error
|
128 |
|
129 |
-
#
|
130 |
audio = AudioSegment.from_file(speaker_wav_path)
|
131 |
duration = audio.duration_seconds
|
132 |
if duration > 120:
|
@@ -273,7 +288,7 @@ with gr.Blocks() as app:
|
|
273 |
|
274 |
def launch_gradio():
|
275 |
app.launch(
|
276 |
-
|
277 |
)
|
278 |
|
279 |
if __name__ == "__main__":
|
|
|
7 |
import tempfile
|
8 |
from scipy.io.wavfile import write, read
|
9 |
from TTS.api import TTS
|
10 |
+
import numpy as np # Добавлен импорт NumPy
|
11 |
|
12 |
+
# Установка переменных окружения для принятия лицензионных условий
|
13 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
14 |
|
15 |
# Глобальные переменные и настройки
|
|
|
60 |
def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
|
61 |
tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
|
62 |
wav_data = tts_synthesis.tts(text, speed=speed)
|
63 |
+
|
64 |
+
# Преобразование wav_data из списка в NumPy массив с типом float32
|
65 |
+
wav_data_np = np.array(wav_data, dtype=np.float32)
|
66 |
+
|
67 |
+
# Нормализация данных, если необходимо
|
68 |
+
max_val = np.max(np.abs(wav_data_np))
|
69 |
+
if max_val > 1.0:
|
70 |
+
wav_data_np = wav_data_np / max_val
|
71 |
+
|
72 |
+
# Масштабирование до int16 для записи в WAV файл
|
73 |
+
wav_data_int16 = np.int16(wav_data_np * 32767)
|
74 |
+
|
75 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
76 |
|
77 |
+
# Запись wav_data_int16 во временный файл
|
78 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_wav_file:
|
79 |
temp_tts_wav_path = temp_tts_wav_file.name
|
80 |
+
write(temp_tts_wav_path, 22050, wav_data_int16) # Используем массив int16
|
81 |
|
82 |
+
# Подготовка временного выходного файла
|
83 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
84 |
temp_output_wav_path = temp_output_wav_file.name
|
85 |
|
86 |
+
# Преобразование голоса
|
87 |
tts_conversion.voice_conversion_to_file(temp_tts_wav_path, target_wav=voice_audio_path,
|
88 |
file_path=temp_output_wav_path)
|
89 |
|
90 |
+
# Чтение преобразованного аудио из temp_output_wav_path
|
91 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
92 |
|
93 |
+
# Удаление временных файлов
|
94 |
os.remove(temp_tts_wav_path)
|
95 |
os.remove(temp_output_wav_path)
|
96 |
|
97 |
return (output_sample_rate, output_audio_data)
|
98 |
|
99 |
def synthesize_speech(text, speaker_wav_path, language_iso, speed):
|
100 |
+
# Генерация речи с помощью tts и сохранение во временный файл
|
101 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_output:
|
102 |
temp_tts_output_path = temp_tts_output.name
|
103 |
tts.tts_to_file(text=text, file_path=temp_tts_output_path, speed=speed,
|
104 |
+
speaker_wav=speaker_wav_path, language=language_iso)
|
105 |
|
106 |
tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
|
107 |
|
108 |
+
# Подготовка временного выходного файла
|
109 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
|
110 |
temp_output_wav_path = temp_output_wav_file.name
|
111 |
|
112 |
+
# Преобразование голоса
|
113 |
tts_conversion.voice_conversion_to_file(temp_tts_output_path, target_wav=speaker_wav_path,
|
114 |
+
file_path=temp_output_wav_path)
|
115 |
|
116 |
+
# Чтение преобразованного аудио из temp_output_wav_path
|
117 |
output_sample_rate, output_audio_data = read(temp_output_wav_path)
|
118 |
|
119 |
+
# Удаление временных файлов
|
120 |
os.remove(temp_tts_output_path)
|
121 |
os.remove(temp_output_wav_path)
|
122 |
|
|
|
141 |
error = gr.Error(error_message, duration=5)
|
142 |
raise error
|
143 |
|
144 |
+
# Проверка длины аудио
|
145 |
audio = AudioSegment.from_file(speaker_wav_path)
|
146 |
duration = audio.duration_seconds
|
147 |
if duration > 120:
|
|
|
288 |
|
289 |
def launch_gradio():
|
290 |
app.launch(
|
291 |
+
# Вы можете добавить параметры запуска здесь, если необходимо
|
292 |
)
|
293 |
|
294 |
if __name__ == "__main__":
|