MCP-compatible

#12
by victor HF Staff - opened
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -52,18 +52,22 @@ def generate_tts_audio(
52
  cfgw_input: float
53
  ) -> tuple[int, np.ndarray]:
54
  """
55
- Generates TTS audio using the ChatterboxTTS model.
 
 
 
 
56
 
57
  Args:
58
- text_input: The text to synthesize (max 300 characters).
59
- audio_prompt_path_input: Path to the reference audio file.
60
- exaggeration_input: Exaggeration parameter for the model.
61
- temperature_input: Temperature parameter for the model.
62
- seed_num_input: Random seed (0 for random).
63
- cfgw_input: CFG/Pace weight.
64
 
65
  Returns:
66
- A tuple containing the sample rate (int) and the audio waveform (numpy.ndarray).
67
  """
68
  current_model = get_or_load_model()
69
 
@@ -133,4 +137,4 @@ with gr.Blocks() as demo:
133
  outputs=[audio_output],
134
  )
135
 
136
- demo.launch()
 
52
  cfgw_input: float
53
  ) -> tuple[int, np.ndarray]:
54
  """
55
+ Generate high-quality speech audio from text using ChatterboxTTS model with reference audio styling.
56
+
57
+ This tool synthesizes natural-sounding speech from input text, using a reference audio file
58
+ to capture the speaker's voice characteristics and speaking style. The generated audio
59
+ maintains the prosody, tone, and vocal qualities of the reference speaker.
60
 
61
  Args:
62
+ text_input (str): The text to synthesize into speech (maximum 300 characters)
63
+ audio_prompt_path_input (str): File path or URL to the reference audio file that defines the target voice style
64
+ exaggeration_input (float): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable)
65
+ temperature_input (float): Controls randomness in generation (0.05-5.0, higher=more varied, default=0.8)
66
+ seed_num_input (int): Random seed for reproducible results (0 for random generation)
67
+ cfgw_input (float): CFG/Pace weight controlling generation guidance (0.2-1.0, default=0.5)
68
 
69
  Returns:
70
+ tuple[int, np.ndarray]: A tuple containing the sample rate (int) and the generated audio waveform (numpy.ndarray)
71
  """
72
  current_model = get_or_load_model()
73
 
 
137
  outputs=[audio_output],
138
  )
139
 
140
+ demo.launch(mcp_server=True)