Uniaff commited on
Commit
77e9d69
·
verified ·
1 Parent(s): 473beda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -14
app.py CHANGED
@@ -7,8 +7,9 @@ from pydub import AudioSegment
7
  import tempfile
8
  from scipy.io.wavfile import write, read
9
  from TTS.api import TTS
 
10
 
11
- # # Set environment variables to accept license terms
12
  os.environ["COQUI_TOS_AGREED"] = "1"
13
 
14
  # Глобальные переменные и настройки
@@ -59,49 +60,63 @@ def check_audio_length(audio_path, max_duration=120):
59
  def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
60
  tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
61
  wav_data = tts_synthesis.tts(text, speed=speed)
 
 
 
 
 
 
 
 
 
 
 
 
62
  tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
63
 
64
- # Write wav_data to temporary file
65
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_wav_file:
66
  temp_tts_wav_path = temp_tts_wav_file.name
67
- write(temp_tts_wav_path, 22050, wav_data)
68
 
69
- # Prepare output temporary file
70
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
71
  temp_output_wav_path = temp_output_wav_file.name
72
 
 
73
  tts_conversion.voice_conversion_to_file(temp_tts_wav_path, target_wav=voice_audio_path,
74
  file_path=temp_output_wav_path)
75
 
76
- # Read converted audio from temp_output_wav_path
77
  output_sample_rate, output_audio_data = read(temp_output_wav_path)
78
 
79
- # Remove temporary files
80
  os.remove(temp_tts_wav_path)
81
  os.remove(temp_output_wav_path)
82
 
83
  return (output_sample_rate, output_audio_data)
84
 
85
  def synthesize_speech(text, speaker_wav_path, language_iso, speed):
86
- # Generate speech using tts and save to temporary file
87
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_output:
88
  temp_tts_output_path = temp_tts_output.name
89
  tts.tts_to_file(text=text, file_path=temp_tts_output_path, speed=speed,
90
- speaker_wav=speaker_wav_path, language=language_iso)
91
 
92
  tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
93
 
94
- # Prepare output temporary file
95
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
96
  temp_output_wav_path = temp_output_wav_file.name
97
 
 
98
  tts_conversion.voice_conversion_to_file(temp_tts_output_path, target_wav=speaker_wav_path,
99
- file_path=temp_output_wav_path)
100
 
101
- # Read converted audio from temp_output_wav_path
102
  output_sample_rate, output_audio_data = read(temp_output_wav_path)
103
 
104
- # Remove temporary files
105
  os.remove(temp_tts_output_path)
106
  os.remove(temp_output_wav_path)
107
 
@@ -126,7 +141,7 @@ def process_speech(text, speaker_wav_path, selected_language, speed):
126
  error = gr.Error(error_message, duration=5)
127
  raise error
128
 
129
- # Check audio length
130
  audio = AudioSegment.from_file(speaker_wav_path)
131
  duration = audio.duration_seconds
132
  if duration > 120:
@@ -273,7 +288,7 @@ with gr.Blocks() as app:
273
 
274
  def launch_gradio():
275
  app.launch(
276
-
277
  )
278
 
279
  if __name__ == "__main__":
 
7
  import tempfile
8
  from scipy.io.wavfile import write, read
9
  from TTS.api import TTS
10
+ import numpy as np # Добавлен импорт NumPy
11
 
12
+ # Установка переменных окружения для принятия лицензионных условий
13
  os.environ["COQUI_TOS_AGREED"] = "1"
14
 
15
  # Глобальные переменные и настройки
 
60
  def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
61
  tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits")
62
  wav_data = tts_synthesis.tts(text, speed=speed)
63
+
64
+ # Преобразование wav_data из списка в NumPy массив с типом float32
65
+ wav_data_np = np.array(wav_data, dtype=np.float32)
66
+
67
+ # Нормализация данных, если необходимо
68
+ max_val = np.max(np.abs(wav_data_np))
69
+ if max_val > 1.0:
70
+ wav_data_np = wav_data_np / max_val
71
+
72
+ # Масштабирование до int16 для записи в WAV файл
73
+ wav_data_int16 = np.int16(wav_data_np * 32767)
74
+
75
  tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
76
 
77
+ # Запись wav_data_int16 во временный файл
78
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_wav_file:
79
  temp_tts_wav_path = temp_tts_wav_file.name
80
+ write(temp_tts_wav_path, 22050, wav_data_int16) # Используем массив int16
81
 
82
+ # Подготовка временного выходного файла
83
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
84
  temp_output_wav_path = temp_output_wav_file.name
85
 
86
+ # Преобразование голоса
87
  tts_conversion.voice_conversion_to_file(temp_tts_wav_path, target_wav=voice_audio_path,
88
  file_path=temp_output_wav_path)
89
 
90
+ # Чтение преобразованного аудио из temp_output_wav_path
91
  output_sample_rate, output_audio_data = read(temp_output_wav_path)
92
 
93
+ # Удаление временных файлов
94
  os.remove(temp_tts_wav_path)
95
  os.remove(temp_output_wav_path)
96
 
97
  return (output_sample_rate, output_audio_data)
98
 
99
  def synthesize_speech(text, speaker_wav_path, language_iso, speed):
100
+ # Генерация речи с помощью tts и сохранение во временный файл
101
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_tts_output:
102
  temp_tts_output_path = temp_tts_output.name
103
  tts.tts_to_file(text=text, file_path=temp_tts_output_path, speed=speed,
104
+ speaker_wav=speaker_wav_path, language=language_iso)
105
 
106
  tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
107
 
108
+ # Подготовка временного выходного файла
109
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_output_wav_file:
110
  temp_output_wav_path = temp_output_wav_file.name
111
 
112
+ # Преобразование голоса
113
  tts_conversion.voice_conversion_to_file(temp_tts_output_path, target_wav=speaker_wav_path,
114
+ file_path=temp_output_wav_path)
115
 
116
+ # Чтение преобразованного аудио из temp_output_wav_path
117
  output_sample_rate, output_audio_data = read(temp_output_wav_path)
118
 
119
+ # Удаление временных файлов
120
  os.remove(temp_tts_output_path)
121
  os.remove(temp_output_wav_path)
122
 
 
141
  error = gr.Error(error_message, duration=5)
142
  raise error
143
 
144
+ # Проверка длины аудио
145
  audio = AudioSegment.from_file(speaker_wav_path)
146
  duration = audio.duration_seconds
147
  if duration > 120:
 
288
 
289
  def launch_gradio():
290
  app.launch(
291
+ # Вы можете добавить параметры запуска здесь, если необходимо
292
  )
293
 
294
  if __name__ == "__main__":