AiMimicry commited on
Commit
d99c32b
·
1 Parent(s): cc460d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -24
app.py CHANGED
@@ -9,7 +9,6 @@ import logging
9
  import soundfile
10
  import asyncio
11
  import argparse
12
- import edge_tts
13
  import gradio.processing_utils as gr_processing_utils
14
  logging.getLogger('numba').setLevel(logging.WARNING)
15
  logging.getLogger('markdown_it').setLevel(logging.WARNING)
@@ -29,21 +28,7 @@ def audio_postprocess(self, y):
29
 
30
  gr.Audio.postprocess = audio_postprocess
31
  def create_vc_fn(model, sid):
32
- def vc_fn(input_audio, vc_transform, auto_f0, tts_text, tts_voice, tts_mode):
33
- if tts_mode:
34
- if len(tts_text) > 100 and limitation:
35
- return "Text is too long", None
36
- if tts_text is None or tts_voice is None:
37
- return "You need to enter text and select a voice", None
38
- asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
39
- audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
40
- raw_path = io.BytesIO()
41
- soundfile.write(raw_path, audio, 16000, format="wav")
42
- raw_path.seek(0)
43
- out_audio, out_sr = model.infer(sid, vc_transform, raw_path,
44
- auto_predict_f0=auto_f0,
45
- )
46
- return "Success", (44100, out_audio.cpu().numpy())
47
  if input_audio is None:
48
  return "You need to upload an audio", None
49
  sampling_rate, audio = input_audio
@@ -73,9 +58,6 @@ if __name__ == '__main__':
73
  hubert_model = utils.get_hubert_model().to(args.device)
74
  models = []
75
  voices = []
76
- tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
77
- for r in tts_voice_list:
78
- voices.append(f"{r['ShortName']}-{r['Gender']}")
79
  for f in os.listdir("models"):
80
  name = f
81
  model = Svc(fr"models/{f}/{f}.pth", f"models/{f}/config.json", device=args.device)
@@ -102,14 +84,10 @@ if __name__ == '__main__':
102
  vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
103
  vc_transform = gr.Number(label="vc_transform", value=0)
104
  auto_f0 = gr.Checkbox(label="auto_f0", value=False)
105
- tts_mode = gr.Checkbox(label="tts (use edge-tts as input)", value=False)
106
- tts_text = gr.Textbox(visible=False, label="TTS text (100 words limitation)" if limitation else "TTS text")
107
- tts_voice = gr.Dropdown(choices=voices, visible=False)
108
- vc_submit = gr.Button("Generate", variant="primary")
109
  with gr.Column():
110
  vc_output1 = gr.Textbox(label="Output Message")
111
  vc_output2 = gr.Audio(label="Output Audio")
112
- vc_submit.click(vc_fn, [vc_input, vc_transform, auto_f0, tts_text, tts_voice, tts_mode], [vc_output1, vc_output2])
113
 
114
  """
115
  for category, link in others.items():
 
9
  import soundfile
10
  import asyncio
11
  import argparse
 
12
  import gradio.processing_utils as gr_processing_utils
13
  logging.getLogger('numba').setLevel(logging.WARNING)
14
  logging.getLogger('markdown_it').setLevel(logging.WARNING)
 
28
 
29
  gr.Audio.postprocess = audio_postprocess
30
  def create_vc_fn(model, sid):
31
+ def vc_fn(input_audio, vc_transform, auto_f0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  if input_audio is None:
33
  return "You need to upload an audio", None
34
  sampling_rate, audio = input_audio
 
58
  hubert_model = utils.get_hubert_model().to(args.device)
59
  models = []
60
  voices = []
 
 
 
61
  for f in os.listdir("models"):
62
  name = f
63
  model = Svc(fr"models/{f}/{f}.pth", f"models/{f}/config.json", device=args.device)
 
84
  vc_input = gr.Audio(label="Input audio"+' (less than 20 seconds)' if limitation else '')
85
  vc_transform = gr.Number(label="vc_transform", value=0)
86
  auto_f0 = gr.Checkbox(label="auto_f0", value=False)
 
 
 
 
87
  with gr.Column():
88
  vc_output1 = gr.Textbox(label="Output Message")
89
  vc_output2 = gr.Audio(label="Output Audio")
90
+ vc_submit.click(vc_fn, [vc_input, vc_transform, auto_f0], [vc_output1, vc_output2])
91
 
92
  """
93
  for category, link in others.items():