mrfakename commited on
Commit
5f635fb
·
1 Parent(s): 5c81b55
Files changed (2) hide show
  1. app.py +5 -2
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import spaces
2
  import torch
3
  import os
 
4
  import gradio as gr
5
  import traceback
6
  from huggingface_hub import snapshot_download
@@ -10,7 +11,7 @@ from tts.infer_cli import MegaTTS3DiTInfer, convert_to_wav, cut_wav
10
  def download_weights():
11
  """Download model weights from HuggingFace if not already present."""
12
  repo_id = "mrfakename/MegaTTS3-VoiceCloning"
13
- weights_dir = "weights"
14
 
15
  if not os.path.exists(weights_dir):
16
  print("Downloading model weights from HuggingFace...")
@@ -62,7 +63,9 @@ def generate_speech(inp_audio, inp_text, infer_timestep, p_w, t_w):
62
 
63
 
64
  with gr.Blocks(title="MegaTTS3 Voice Cloning") as demo:
65
- gr.Markdown("# MegaTTS3 Voice Cloning")
 
 
66
  gr.Markdown("Upload a reference audio clip and enter text to generate speech with the cloned voice.")
67
 
68
  with gr.Row():
 
1
  import spaces
2
  import torch
3
  import os
4
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
5
  import gradio as gr
6
  import traceback
7
  from huggingface_hub import snapshot_download
 
11
  def download_weights():
12
  """Download model weights from HuggingFace if not already present."""
13
  repo_id = "mrfakename/MegaTTS3-VoiceCloning"
14
+ weights_dir = "checkpoints"
15
 
16
  if not os.path.exists(weights_dir):
17
  print("Downloading model weights from HuggingFace...")
 
63
 
64
 
65
  with gr.Blocks(title="MegaTTS3 Voice Cloning") as demo:
66
+ gr.Markdown("# MegaTTS 3 Voice Cloning")
67
+ gr.Markdown("MegaTTS 3 is a text-to-speech model trained by ByteDance with exceptional voice cloning capabilities. The original authors did not release the WavVAE encoder, so voice cloning was not publicly available; however, thanks to [@ACoderPassBy](https://modelscope.cn/models/ACoderPassBy/MegaTTS-SFT)'s WavVAE encoder, we can now clone voices with MegaTTS 3!")
68
+ gr.Markdown("h/t to MysteryShack on Discord for the info about the unofficial WavVAE encoder!")
69
  gr.Markdown("Upload a reference audio clip and enter text to generate speech with the cloned voice.")
70
 
71
  with gr.Row():
requirements.txt CHANGED
@@ -16,3 +16,4 @@ torchdiffeq==0.2.5
16
  openai-whisper==20240930
17
  httpx==0.28.1
18
  gradio==5.23.1
 
 
16
  openai-whisper==20240930
17
  httpx==0.28.1
18
  gradio==5.23.1
19
+ hf-transfer