Spaces:
Running
Running
Commit
·
5f635fb
1
Parent(s):
5c81b55
update
Browse files- app.py +5 -2
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import spaces
|
2 |
import torch
|
3 |
import os
|
|
|
4 |
import gradio as gr
|
5 |
import traceback
|
6 |
from huggingface_hub import snapshot_download
|
@@ -10,7 +11,7 @@ from tts.infer_cli import MegaTTS3DiTInfer, convert_to_wav, cut_wav
|
|
10 |
def download_weights():
|
11 |
"""Download model weights from HuggingFace if not already present."""
|
12 |
repo_id = "mrfakename/MegaTTS3-VoiceCloning"
|
13 |
-
weights_dir = "
|
14 |
|
15 |
if not os.path.exists(weights_dir):
|
16 |
print("Downloading model weights from HuggingFace...")
|
@@ -62,7 +63,9 @@ def generate_speech(inp_audio, inp_text, infer_timestep, p_w, t_w):
|
|
62 |
|
63 |
|
64 |
with gr.Blocks(title="MegaTTS3 Voice Cloning") as demo:
|
65 |
-
gr.Markdown("#
|
|
|
|
|
66 |
gr.Markdown("Upload a reference audio clip and enter text to generate speech with the cloned voice.")
|
67 |
|
68 |
with gr.Row():
|
|
|
1 |
import spaces
|
2 |
import torch
|
3 |
import os
|
4 |
+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
5 |
import gradio as gr
|
6 |
import traceback
|
7 |
from huggingface_hub import snapshot_download
|
|
|
11 |
def download_weights():
|
12 |
"""Download model weights from HuggingFace if not already present."""
|
13 |
repo_id = "mrfakename/MegaTTS3-VoiceCloning"
|
14 |
+
weights_dir = "checkpoints"
|
15 |
|
16 |
if not os.path.exists(weights_dir):
|
17 |
print("Downloading model weights from HuggingFace...")
|
|
|
63 |
|
64 |
|
65 |
with gr.Blocks(title="MegaTTS3 Voice Cloning") as demo:
|
66 |
+
gr.Markdown("# MegaTTS 3 Voice Cloning")
|
67 |
+
gr.Markdown("MegaTTS 3 is a text-to-speech model trained by ByteDance with exceptional voice cloning capabilities. The original authors did not release the WavVAE encoder, so voice cloning was not publicly available; however, thanks to [@ACoderPassBy](https://modelscope.cn/models/ACoderPassBy/MegaTTS-SFT)'s WavVAE encoder, we can now clone voices with MegaTTS 3!")
|
68 |
+
gr.Markdown("h/t to MysteryShack on Discord for the info about the unofficial WavVAE encoder!")
|
69 |
gr.Markdown("Upload a reference audio clip and enter text to generate speech with the cloned voice.")
|
70 |
|
71 |
with gr.Row():
|
requirements.txt
CHANGED
@@ -16,3 +16,4 @@ torchdiffeq==0.2.5
|
|
16 |
openai-whisper==20240930
|
17 |
httpx==0.28.1
|
18 |
gradio==5.23.1
|
|
|
|
16 |
openai-whisper==20240930
|
17 |
httpx==0.28.1
|
18 |
gradio==5.23.1
|
19 |
+
hf-transfer
|