Spaces:

sablab
/

Bark

Sleeping

sablab commited on Jul 18

Commit

c557bcd

verified ·

1 Parent(s): 3076884

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,13 +1,29 @@
 import gradio as gr
-from transformers import pipeline
 import numpy as np
-# Initialize the text-to-speech pipeline
-synthesiser = pipeline("text-to-speech", "suno/bark")
 def generate_speech(text):
     # Generate speech with the provided text
-    speech = synthesiser(text, forward_params={"do_sample": True})
     # Return audio data and sampling rate for Gradio
     return speech["sampling_rate"], speech["audio"]
@@ -16,8 +32,8 @@ iface = gr.Interface(
     fn=generate_speech,
     inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
     outputs=gr.Audio(type="numpy"),
-    title="Text-to-Speech with Suno/Bark",
-    description="Enter text to generate speech using the Suno/Bark model."
 )
 # Launch the app

 import gradio as gr
+from transformers import pipeline, BarkModel, AutoProcessor
+import torch
+from optimum.bettertransformer import BetterTransformer
 import numpy as np
+# Check for GPU availability
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# Initialize the text-to-speech pipeline with the smaller model and fp16
+synthesiser = pipeline(
+    "text-to-speech",
+    model="suno/bark-small",  # Use smaller model for faster inference
+    device=device,
+    torch_dtype=torch.float16,  # Enable half-precision for speed
+)
+# Convert model to BetterTransformer for kernel fusion
+synthesiser.model = BetterTransformer.transform(synthesiser.model, keep_original_model=False)
+# Optional: Enable CPU offloading for low VRAM (uncomment if needed)
+# synthesiser.model.enable_cpu_offload()
 def generate_speech(text):
     # Generate speech with the provided text
+    speech = synthesiser(text, forward_params={"do_sample": True, "fine_temperature": 0.4, "coarse_temperature": 0.8})
     # Return audio data and sampling rate for Gradio
     return speech["sampling_rate"], speech["audio"]
     fn=generate_speech,
     inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
     outputs=gr.Audio(type="numpy"),
+    title="Text-to-Speech with Suno/Bark-Small",
+    description="Enter text to generate speech using the optimized Suno/Bark-Small model."
 )
 # Launch the app