Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,29 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
|
|
|
|
3 |
import numpy as np
|
4 |
|
5 |
-
#
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def generate_speech(text):
|
9 |
# Generate speech with the provided text
|
10 |
-
speech = synthesiser(text, forward_params={"do_sample": True})
|
11 |
# Return audio data and sampling rate for Gradio
|
12 |
return speech["sampling_rate"], speech["audio"]
|
13 |
|
@@ -16,8 +32,8 @@ iface = gr.Interface(
|
|
16 |
fn=generate_speech,
|
17 |
inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
|
18 |
outputs=gr.Audio(type="numpy"),
|
19 |
-
title="Text-to-Speech with Suno/Bark",
|
20 |
-
description="Enter text to generate speech using the Suno/Bark model."
|
21 |
)
|
22 |
|
23 |
# Launch the app
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, BarkModel, AutoProcessor
|
3 |
+
import torch
|
4 |
+
from optimum.bettertransformer import BetterTransformer
|
5 |
import numpy as np
|
6 |
|
7 |
+
# Check for GPU availability
|
8 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
9 |
+
|
10 |
+
# Initialize the text-to-speech pipeline with the smaller model and fp16
|
11 |
+
synthesiser = pipeline(
|
12 |
+
"text-to-speech",
|
13 |
+
model="suno/bark-small", # Use smaller model for faster inference
|
14 |
+
device=device,
|
15 |
+
torch_dtype=torch.float16, # Enable half-precision for speed
|
16 |
+
)
|
17 |
+
|
18 |
+
# Convert model to BetterTransformer for kernel fusion
|
19 |
+
synthesiser.model = BetterTransformer.transform(synthesiser.model, keep_original_model=False)
|
20 |
+
|
21 |
+
# Optional: Enable CPU offloading for low VRAM (uncomment if needed)
|
22 |
+
# synthesiser.model.enable_cpu_offload()
|
23 |
|
24 |
def generate_speech(text):
|
25 |
# Generate speech with the provided text
|
26 |
+
speech = synthesiser(text, forward_params={"do_sample": True, "fine_temperature": 0.4, "coarse_temperature": 0.8})
|
27 |
# Return audio data and sampling rate for Gradio
|
28 |
return speech["sampling_rate"], speech["audio"]
|
29 |
|
|
|
32 |
fn=generate_speech,
|
33 |
inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
|
34 |
outputs=gr.Audio(type="numpy"),
|
35 |
+
title="Text-to-Speech with Suno/Bark-Small",
|
36 |
+
description="Enter text to generate speech using the optimized Suno/Bark-Small model."
|
37 |
)
|
38 |
|
39 |
# Launch the app
|