Uniaff commited on
Commit
486b9c4
·
verified ·
1 Parent(s): cbaf094

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -46
app.py CHANGED
@@ -1,46 +1,141 @@
1
- import gradio as gr
2
- import subprocess
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- def generate(video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bottom, pad_left, pad_right, save_as_video):
6
- if video is None or audio is None or checkpoint is None:
7
- return "Пожалуйста, загрузите видео/изображение и аудио файл, а также выберите чекпойнт."
 
 
 
 
8
 
9
- print(f"Текущая рабочая директория: {os.getcwd()}")
10
- print(f"Содержимое текущей директории: {os.listdir('.')}")
11
- print(f"Проверка наличия 'inference.py': {os.path.exists('inference.py')}")
12
 
13
- video_path = video
14
- audio_path = audio
15
 
16
- print(f"Путь к видео: {video_path}")
17
- print(f"Путь к аудио: {audio_path}")
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
19
  output_dir = "outputs"
20
  os.makedirs(output_dir, exist_ok=True)
21
-
22
  output_path = os.path.join(output_dir, "output.mp4")
23
- print(f"Путь к выходному файлу: {output_path}")
24
 
25
  args = [
26
- "--checkpoint_path", f"checkpoints/{checkpoint}.pth",
27
- "--segmentation_path", "checkpoints/face_segmentation.pth",
28
- "--no_seg",
29
- "--no_sr",
30
- "--face", video_path,
31
- "--audio", audio_path,
32
- "--outfile", output_path,
33
- "--resize_factor", "2",
34
- "--face_det_batch_size", "4",
35
- "--wav2lip_batch_size", "64",
36
- "--fps", "30",
37
-
38
- "--pads", str(pad_top), str(pad_bottom), str(pad_left), str(pad_right)
39
  ]
40
 
41
  if no_smooth:
42
  args.append("--nosmooth")
43
-
44
  if save_as_video:
45
  args.append("--save_as_video")
46
 
@@ -59,31 +154,72 @@ def generate(video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bo
59
  print(f"Выходной файл создан по пути: {output_path}")
60
  return output_path
61
 
62
- with gr.Blocks() as ui:
63
- gr.Markdown("## Lypsinc")
 
 
 
 
 
 
 
 
 
 
 
64
  with gr.Row():
65
- video = gr.File(label="Видео или Изображение", type="filepath")
66
- audio = gr.File(label="Аудио", type="filepath")
67
  with gr.Column():
68
- checkpoint = gr.Radio(["wav2lip", "wav2lip_gan"], label="Чекпойнт", value="wav2lip_gan", visible=False)
69
- no_smooth = gr.Checkbox(label="Без сглаживания", value=False)
70
- resize_factor = gr.Slider(minimum=1, maximum=4, step=1, label="Фактор изменения размера", value=2)
71
- with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  with gr.Column():
 
 
73
  pad_top = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ сверху")
74
  pad_bottom = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Отступ снизу")
75
  pad_left = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ слева")
76
  pad_right = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ справа")
77
- save_as_video = gr.Checkbox(label="Сохранять как видео", value=True)
78
- generate_btn = gr.Button("Сгенерировать")
79
- with gr.Column():
80
- result = gr.Video(label="Результат")
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- generate_btn.click(
83
- generate,
84
- inputs=[video, audio, checkpoint, no_smooth, resize_factor, pad_top, pad_bottom, pad_left, pad_right, save_as_video],
85
- outputs=result,
86
- concurrency_limit=30
87
  )
88
 
89
- ui.launch(debug=True)
 
 
 
 
1
  import os
2
+ import subprocess
3
+ import sys
4
+ import uuid
5
+ import gradio as gr
6
+ from pydub import AudioSegment
7
+ from TTS.api import TTS
8
+
9
+ # Инициализация моделей TTS
10
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", )
11
+ # tts.to("cuda")
12
+
13
+ # Опции языков
14
+ language_options = {
15
+ "English (en)": "en",
16
+ "Spanish (es)": "es",
17
+ "French (fr)": "fr",
18
+ "German (de)": "de",
19
+ "Italian (it)": "it",
20
+ "Portuguese (pt)": "pt",
21
+ "Polish (pl)": "pl",
22
+ "Turkish (tr)": "tr",
23
+ "Russian (ru)": "ru",
24
+ "Dutch (nl)": "nl",
25
+ "Czech (cs)": "cs",
26
+ "Arabic (ar)": "ar",
27
+ "Chinese (zh-cn)": "zh-cn",
28
+ "Japanese (ja)": "ja",
29
+ "Hungarian (hu)": "hu",
30
+ "Korean (ko)": "ko",
31
+ "Hindi (hi)": "hi"
32
+ }
33
+
34
+ other_language = {
35
+ "Vietnamese": "vie",
36
+ "Serbian": "srp",
37
+ "Romanian": "ron",
38
+ "Indonesian": "ind",
39
+ "Philippine": "tgl"
40
+ }
41
+
42
+ def clean_audio(audio_path):
43
+ out_filename = f"output/cleaned_{uuid.uuid4()}.wav"
44
+ lowpass_highpass = "lowpass=8000,highpass=75,"
45
+ trim_silence = "areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,"
46
+ try:
47
+ shell_command = f"ffmpeg -y -i {audio_path} -af {lowpass_highpass}{trim_silence} {out_filename}".split()
48
+ subprocess.run(shell_command, capture_output=True, check=True)
49
+ print(f"Audio cleaned and saved to {out_filename}")
50
+ return out_filename
51
+ except subprocess.CalledProcessError as e:
52
+ print(f"Error during audio cleaning: {e}")
53
+ return audio_path
54
+
55
+ def check_audio_length(audio_path, max_duration=120):
56
+ try:
57
+ audio = AudioSegment.from_file(audio_path)
58
+ duration = audio.duration_seconds
59
+ if duration > max_duration:
60
+ print(f"Audio is too long: {duration} seconds. Max allowed is {max_duration} seconds.")
61
+ return False
62
+ return True
63
+ except Exception as e:
64
+ print(f"Error while checking audio length: {e}")
65
+ return False
66
+
67
+ def synthesize_and_convert_voice(text, language_iso, voice_audio_path, speed):
68
+ tts_synthesis = TTS(model_name=f"tts_models/{language_iso}/fairseq/vits", )
69
+ wav_data = tts_synthesis.tts(text, speed=speed)
70
+ tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
71
+ output_file = "output/docout.wav"
72
+ os.makedirs("output", exist_ok=True)
73
+ tts_conversion.voice_conversion_to_file(wav_data, target_wav=voice_audio_path,
74
+ file_path=output_file)
75
+ return output_file # Возвращаем путь к сгенерированному аудио
76
+
77
+ def synthesize_speech(text, speaker_wav_path, language_iso, speed):
78
+ output_file_xtts = "output/undocout.wav"
79
+ tts.tts_to_file(text=text, file_path=output_file_xtts, speed=speed, speaker_wav=speaker_wav_path,
80
+ language=language_iso)
81
+ tts_conversion = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False)
82
+ output_file = "output/docout.wav"
83
+ os.makedirs("output", exist_ok=True)
84
+ tts_conversion.voice_conversion_to_file(output_file_xtts, target_wav=speaker_wav_path,
85
+ file_path=output_file)
86
+ return output_file # Возвращаем путь к сгенерированному аудио
87
 
88
+ def get_language_code(selected_language):
89
+ if selected_language in language_options:
90
+ return language_options[selected_language]
91
+ elif selected_language in other_language:
92
+ return other_language[selected_language]
93
+ else:
94
+ return None
95
 
96
+ def process_speech(text, speaker_wav, selected_language, speed):
97
+ language_code = get_language_code(selected_language)
 
98
 
99
+ if language_code is None:
100
+ raise ValueError("Выбранный язык не поддерживается.")
101
 
102
+ # Проверка длины аудио
103
+ if not check_audio_length(speaker_wav):
104
+ error_message = "Длина аудио превышает допустимый лимит в 2 минуты."
105
+ error = gr.Error(error_message, duration=5)
106
+ raise error
107
 
108
+ cleaned_wav_path = clean_audio(speaker_wav)
109
+
110
+ if selected_language in other_language:
111
+ audio_path = synthesize_and_convert_voice(text, language_code, cleaned_wav_path, speed)
112
+ else:
113
+ audio_path = synthesize_speech(text, cleaned_wav_path, language_code, speed)
114
+
115
+ return audio_path
116
+
117
+ def generate_lipsync(video_path, audio_path, pad_top, pad_bottom, pad_left, pad_right, no_smooth, save_as_video):
118
  output_dir = "outputs"
119
  os.makedirs(output_dir, exist_ok=True)
 
120
  output_path = os.path.join(output_dir, "output.mp4")
 
121
 
122
  args = [
123
+ "--checkpoint_path", "checkpoints/wav2lip_gan.pth",
124
+ "--segmentation_path", "checkpoints/face_segmentation.pth",
125
+ "--no_seg",
126
+ "--no_sr",
127
+ "--face", video_path,
128
+ "--audio", audio_path,
129
+ "--outfile", output_path,
130
+ "--resize_factor", "2",
131
+ "--face_det_batch_size", "4",
132
+ "--wav2lip_batch_size", "64",
133
+ "--fps", "30",
134
+ "--pads", str(pad_top), str(pad_bottom), str(pad_left), str(pad_right)
 
135
  ]
136
 
137
  if no_smooth:
138
  args.append("--nosmooth")
 
139
  if save_as_video:
140
  args.append("--save_as_video")
141
 
 
154
  print(f"Выходной файл создан по пути: {output_path}")
155
  return output_path
156
 
157
+ def process_all(text, speaker_wav, selected_language, speed, video, pad_top, pad_bottom, pad_left, pad_right, no_smooth, save_as_video):
158
+ # Шаг 1: Генерация аудио с клонированным голосом
159
+ audio_path = process_speech(text, speaker_wav, selected_language, speed)
160
+
161
+ # Шаг 2: Генерация видео с липсинком
162
+ video_path = video # Предполагается, что video — это путь к файлу
163
+
164
+ result = generate_lipsync(video_path, audio_path, pad_top, pad_bottom, pad_left, pad_right, no_smooth, save_as_video)
165
+ return result
166
+
167
+ with gr.Blocks() as demo:
168
+ gr.Markdown("# Объединение Voice Clone и Lipsync")
169
+
170
  with gr.Row():
 
 
171
  with gr.Column():
172
+ gr.Markdown("### Шаг 1: Настройки синтеза речи")
173
+ text_input = gr.Textbox(label="Введите текст для генерации", placeholder="Введите ваш текст здесь...")
174
+ speaker_wav_input = gr.Audio(label="Загрузите аудио говорящего (WAV формат)", type="filepath")
175
+
176
+ all_languages = list(language_options.keys()) + list(other_language.keys())
177
+ language_input = gr.Dropdown(
178
+ label="Язык",
179
+ choices=all_languages,
180
+ value="English (en)"
181
+ )
182
+
183
+ speed_input = gr.Slider(
184
+ label="Скорость синтеза",
185
+ minimum=0.1,
186
+ maximum=10,
187
+ step=0.1,
188
+ value=1.0,
189
+ info="Выберите скорость"
190
+ )
191
+
192
  with gr.Column():
193
+ gr.Markdown("### Шаг 2: Настройки липсинка")
194
+ video_input = gr.File(label="Видео или Изображение", type="filepath")
195
  pad_top = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ сверху")
196
  pad_bottom = gr.Slider(minimum=0, maximum=50, step=1, value=10, label="Отступ снизу")
197
  pad_left = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ слева")
198
  pad_right = gr.Slider(minimum=0, maximum=50, step=1, value=0, label="Отступ справа")
199
+ no_smooth = gr.Checkbox(label="Без сглаживания", value=False)
200
+ save_as_video = gr.Checkbox(label="Сохранять как видео", value=True)
201
+
202
+ output_video = gr.Video(label="Сгенерированное видео")
203
+
204
+ with gr.Row():
205
+ generate_button = gr.Button("Сгенерировать")
206
+ gr.HTML("<div style='width:300px;'></div>")
207
+ reload_button = gr.Button("Перезапустить")
208
+
209
+ generate_button.click(
210
+ fn=process_all,
211
+ inputs=[text_input, speaker_wav_input, language_input, speed_input, video_input, pad_top, pad_bottom, pad_left, pad_right, no_smooth, save_as_video],
212
+ outputs=output_video
213
+ )
214
+
215
+ reload_button.click(fn=lambda: os._exit(0), inputs=None, outputs=None)
216
 
217
+ def launch_gradio():
218
+ demo.launch(
219
+ debug=True,
220
+ server_port=8600,
221
+ server_name="0.0.0.0",
222
  )
223
 
224
+ if __name__ == "__main__":
225
+ launch_gradio()