kevinwang676 commited on
Commit
e1f204c
·
verified ·
1 Parent(s): aba1e24

Update app_share.py

Browse files
Files changed (1) hide show
  1. app_share.py +53 -36
app_share.py CHANGED
@@ -55,7 +55,9 @@ import ffmpeg
55
 
56
  import random
57
  import numpy as np
58
- from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
 
 
59
 
60
  def pad_buffer(audio):
61
  # Pad buffer to multiple of 2 bytes
@@ -65,21 +67,15 @@ def pad_buffer(audio):
65
  audio = audio + b'\0' * (element_size - (buffer_size % element_size))
66
  return audio
67
 
68
- def generate_voice(text, voice_name):
69
- try:
70
- audio = generate(
71
- text[:250], # Limit to 250 characters
72
- voice=voice_name,
73
- model="eleven_multilingual_v2"
74
- )
75
- with open("output" + ".mp3", mode='wb') as f:
76
- f.write(audio)
77
- return "output.mp3"
78
-
79
- except UnauthenticatedRateLimitError as e:
80
- raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
81
- except Exception as e:
82
- raise gr.Error(e)
83
 
84
  html_denoise = """
85
  <html>
@@ -105,7 +101,7 @@ html_denoise = """
105
  </html>
106
  """
107
 
108
- def convert(api_key, text, tgt, voice, save_path):
109
  model = "FreeVC (24kHz)"
110
  with torch.no_grad():
111
  # tgt
@@ -127,8 +123,6 @@ def convert(api_key, text, tgt, voice, save_path):
127
  hps.data.mel_fmax
128
  )
129
  # src
130
-
131
- os.environ["ELEVEN_API_KEY"] = api_key
132
  src = generate_voice(text, voice)
133
  wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
134
  wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
@@ -308,7 +302,11 @@ def merge_audios(folder_path):
308
 
309
  import shutil
310
 
311
- def convert_from_srt(apikey, filename, audio_full, voice, multilingual):
 
 
 
 
312
  subtitle_list = read_srt(filename)
313
 
314
  #audio_data, sr = librosa.load(audio_full, sr=44100)
@@ -324,7 +322,7 @@ def convert_from_srt(apikey, filename, audio_full, voice, multilingual):
324
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
325
  print(f"正在合成第{i.index}条语音")
326
  print(f"语音内容:{i.text}")
327
- convert(apikey, i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
328
  except Exception:
329
  pass
330
  else:
@@ -334,7 +332,7 @@ def convert_from_srt(apikey, filename, audio_full, voice, multilingual):
334
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
335
  print(f"正在合成第{i.index}条语音")
336
  print(f"语音内容:{i.text.splitlines()[1]}")
337
- convert(apikey, i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
338
  except Exception:
339
  pass
340
  merge_audios("output")
@@ -345,25 +343,44 @@ restart_markdown = ("""
345
  ### 若此页面无法正常显示,请点击[此链接](https://openxlab.org.cn/apps/detail/Kevin676/OpenAI-TTS)唤醒该程序!谢谢🍻
346
  """)
347
 
348
- all_voices = voices()
 
 
 
 
 
 
 
 
 
 
349
 
350
  with gr.Blocks() as app:
351
  gr.Markdown("# <center>🌊💕🎶 11Labs TTS - SRT文件一键AI配音</center>")
352
  gr.Markdown("### <center>🌟 只需上传SRT文件和原版配音文件即可,每次一集视频AI自动配音!Developed by Kevin Wang </center>")
353
- with gr.Row():
354
- with gr.Column():
355
- inp0 = gr.Textbox(type='password', label='请输入您的11Labs API Key')
356
- inp1 = gr.File(file_count="single", label="请上传一集视频对应的SRT文件")
357
- inp2 = gr.Audio(label="请上传一集视频的配音文件", type="filepath")
358
-
359
- inp3 = gr.Dropdown(choices=[ voice.name for voice in all_voices ], label='请选择一个说话人提供基础音色', info="试听音色链接:https://huggingface.co/spaces/elevenlabs/tts", value='Rachel')
360
- #inp4 = gr.Dropdown(label="请选择用于分离伴奏的模型", info="UVR-HP5去除背景音乐效果更好,但会对人声造成一定的损伤", choices=["UVR-HP2", "UVR-HP5"], value="UVR-HP5")
361
- inp4 = gr.Checkbox(label="SRT文件是否为双语字幕", info="若为双语字幕,请打���选择(SRT文件中需要先出现中文字幕,后英文字幕;中英字幕各占一行)")
362
- btn = gr.Button("一键开启AI配音吧💕", variant="primary")
363
- with gr.Column():
364
- out1 = gr.Audio(label="为您生成的AI完整配音", type="filepath")
 
365
 
366
- btn.click(convert_from_srt, [inp0, inp1, inp2, inp3, inp4], [out1])
 
 
 
 
 
 
 
 
367
  gr.Markdown("### <center>注意❗:请勿生成会对任何个人或组织造成侵害的内容,请尊重他人的著作权和知识产权。用户对此程序的任何使用行为与程序开发者无关。</center>")
368
  gr.HTML('''
369
  <div class="footer">
 
55
 
56
  import random
57
  import numpy as np
58
+
59
+ from elevenlabs.client import ElevenLabs
60
+
61
 
62
  def pad_buffer(audio):
63
  # Pad buffer to multiple of 2 bytes
 
67
  audio = audio + b'\0' * (element_size - (buffer_size % element_size))
68
  return audio
69
 
70
+
71
+ def generate_voice(text, voice):
72
+
73
+ audio = client.generate(text=text, voice=voice) #response.voices[0]
74
+ audio = b"".join(audio)
75
+ with open("output.mp3", "wb") as f:
76
+ f.write(audio)
77
+ return "output.mp3"
78
+
 
 
 
 
 
 
79
 
80
  html_denoise = """
81
  <html>
 
101
  </html>
102
  """
103
 
104
+ def convert(text, tgt, voice, save_path):
105
  model = "FreeVC (24kHz)"
106
  with torch.no_grad():
107
  # tgt
 
123
  hps.data.mel_fmax
124
  )
125
  # src
 
 
126
  src = generate_voice(text, voice)
127
  wav_src, _ = librosa.load(src, sr=hps.data.sampling_rate)
128
  wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(device)
 
302
 
303
  import shutil
304
 
305
+ def convert_from_srt(api_key, filename, audio_full, voice, multilingual):
306
+
307
+ client = ElevenLabs(
308
+ api_key=api_key, # Defaults to ELEVEN_API_KEY
309
+ )
310
  subtitle_list = read_srt(filename)
311
 
312
  #audio_data, sr = librosa.load(audio_full, sr=44100)
 
322
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
323
  print(f"正在合成第{i.index}条语音")
324
  print(f"语音内容:{i.text}")
325
+ convert(i.text, f"sliced_audio_{i.index}_0.wav", voice, i.text + " " + str(i.index))
326
  except Exception:
327
  pass
328
  else:
 
332
  trim_audio([[i.start_time, i.end_time]], audio_full, f"sliced_audio_{i.index}")
333
  print(f"正在合成第{i.index}条语音")
334
  print(f"语音内容:{i.text.splitlines()[1]}")
335
+ convert(i.text.splitlines()[1], f"sliced_audio_{i.index}_0.wav", voice, i.text.splitlines()[1] + " " + str(i.index))
336
  except Exception:
337
  pass
338
  merge_audios("output")
 
343
  ### 若此页面无法正常显示,请点击[此链接](https://openxlab.org.cn/apps/detail/Kevin676/OpenAI-TTS)唤醒该程序!谢谢🍻
344
  """)
345
 
346
+ import ffmpeg
347
+
348
+ def denoise(video_full):
349
+
350
+ if os.path.exists("audio_full.wav"):
351
+ os.remove("audio_full.wav")
352
+
353
+ ffmpeg.input(video_full).output("audio_full.wav", ac=2, ar=44100).run()
354
+
355
+ return "audio_full.wav"
356
+
357
 
358
  with gr.Blocks() as app:
359
  gr.Markdown("# <center>🌊💕🎶 11Labs TTS - SRT文件一键AI配音</center>")
360
  gr.Markdown("### <center>🌟 只需上传SRT文件和原版配音文件即可,每次一集视频AI自动配音!Developed by Kevin Wang </center>")
361
+ with gr.Tab("📺视频转音频"):
362
+ with gr.Row():
363
+ inp_video = gr.Video(label="请上传一集包含原声配音的视频", info="需要是.mp4视频文件")
364
+ btn_convert = gr.Button("视频文件转音频", variant="primary")
365
+ out_audio = gr.Audio(label="视频对应的音频文件,可以下载至本地后进行降噪处理", type="filepath")
366
+
367
+ btn_convert.click(denoise, [inp_video], [out_audio])
368
+ with gr.Tab("🎶AI配音"):
369
+ with gr.Row():
370
+ with gr.Column():
371
+ inp0 = gr.Textbox(type='password', label='请输入您的11Labs API Key')
372
+ inp1 = gr.File(file_count="single", label="请上传一集视频对应的SRT文件")
373
+ inp2 = gr.Audio(label="请上传一集视频的配音文件", type="filepath")
374
 
375
+ inp3 = gr.Dropdown(choices=["Rachel", "Alice", "Chris", "Adam"], label='请选择一个说话人提供基础音色', info="试听音色链接:https://elevenlabs.io/app/speech-synthesis", value='Chris')
376
+ #inp4 = gr.Dropdown(label="请选择用于分离伴奏的模型", info="UVR-HP5去除背景音乐效果更好,但会对人声造成一定的损伤", choices=["UVR-HP2", "UVR-HP5"], value="UVR-HP5")
377
+ inp4 = gr.Checkbox(label="SRT文件是否为双语字幕", info="若为双语字幕,请打勾选择(SRT文件中需要先出现中文字幕,后英文字幕;中英字幕各占一行)")
378
+ btn = gr.Button("一键开启AI配音吧💕", variant="primary")
379
+ with gr.Column():
380
+ out1 = gr.Audio(label="为您生成的AI完整配音", type="filepath")
381
+
382
+ btn.click(convert_from_srt, [inp0, inp1, inp2, inp3, inp4], [out1])
383
+
384
  gr.Markdown("### <center>注意❗:请勿生成会对任何个人或组织造成侵害的内容,请尊重他人的著作权和知识产权。用户对此程序的任何使用行为与程序开发者无关。</center>")
385
  gr.HTML('''
386
  <div class="footer">