sablab commited on
Commit
c557bcd
·
verified ·
1 Parent(s): 3076884

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -1,13 +1,29 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
3
  import numpy as np
4
 
5
- # Initialize the text-to-speech pipeline
6
- synthesiser = pipeline("text-to-speech", "suno/bark")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def generate_speech(text):
9
  # Generate speech with the provided text
10
- speech = synthesiser(text, forward_params={"do_sample": True})
11
  # Return audio data and sampling rate for Gradio
12
  return speech["sampling_rate"], speech["audio"]
13
 
@@ -16,8 +32,8 @@ iface = gr.Interface(
16
  fn=generate_speech,
17
  inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
18
  outputs=gr.Audio(type="numpy"),
19
- title="Text-to-Speech with Suno/Bark",
20
- description="Enter text to generate speech using the Suno/Bark model."
21
  )
22
 
23
  # Launch the app
 
1
  import gradio as gr
2
+ from transformers import pipeline, BarkModel, AutoProcessor
3
+ import torch
4
+ from optimum.bettertransformer import BetterTransformer
5
  import numpy as np
6
 
7
+ # Check for GPU availability
8
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Initialize the text-to-speech pipeline with the smaller model and fp16
11
+ synthesiser = pipeline(
12
+ "text-to-speech",
13
+ model="suno/bark-small", # Use smaller model for faster inference
14
+ device=device,
15
+ torch_dtype=torch.float16, # Enable half-precision for speed
16
+ )
17
+
18
+ # Convert model to BetterTransformer for kernel fusion
19
+ synthesiser.model = BetterTransformer.transform(synthesiser.model, keep_original_model=False)
20
+
21
+ # Optional: Enable CPU offloading for low VRAM (uncomment if needed)
22
+ # synthesiser.model.enable_cpu_offload()
23
 
24
  def generate_speech(text):
25
  # Generate speech with the provided text
26
+ speech = synthesiser(text, forward_params={"do_sample": True, "fine_temperature": 0.4, "coarse_temperature": 0.8})
27
  # Return audio data and sampling rate for Gradio
28
  return speech["sampling_rate"], speech["audio"]
29
 
 
32
  fn=generate_speech,
33
  inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
34
  outputs=gr.Audio(type="numpy"),
35
+ title="Text-to-Speech with Suno/Bark-Small",
36
+ description="Enter text to generate speech using the optimized Suno/Bark-Small model."
37
  )
38
 
39
  # Launch the app