Spaces:

ResembleAI
/

Chatterbox

Running on Zero

MCP-compatible

#12

by victor HF Staff - opened 24 days ago

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -52,18 +52,22 @@ def generate_tts_audio(
     cfgw_input: float
 ) -> tuple[int, np.ndarray]:
     """
-    Generates TTS audio using the ChatterboxTTS model.
     Args:
-        text_input: The text to synthesize (max 300 characters).
-        audio_prompt_path_input: Path to the reference audio file.
-        exaggeration_input: Exaggeration parameter for the model.
-        temperature_input: Temperature parameter for the model.
-        seed_num_input: Random seed (0 for random).
-        cfgw_input: CFG/Pace weight.
     Returns:
-        A tuple containing the sample rate (int) and the audio waveform (numpy.ndarray).
     """
     current_model = get_or_load_model()
@@ -133,4 +137,4 @@ with gr.Blocks() as demo:
         outputs=[audio_output],
     )
-demo.launch()

     cfgw_input: float
 ) -> tuple[int, np.ndarray]:
     """
+    Generate high-quality speech audio from text using ChatterboxTTS model with reference audio styling.
+    This tool synthesizes natural-sounding speech from input text, using a reference audio file
+    to capture the speaker's voice characteristics and speaking style. The generated audio
+    maintains the prosody, tone, and vocal qualities of the reference speaker.
     Args:
+        text_input (str): The text to synthesize into speech (maximum 300 characters)
+        audio_prompt_path_input (str): File path or URL to the reference audio file that defines the target voice style
+        exaggeration_input (float): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable)
+        temperature_input (float): Controls randomness in generation (0.05-5.0, higher=more varied, default=0.8)
+        seed_num_input (int): Random seed for reproducible results (0 for random generation)
+        cfgw_input (float): CFG/Pace weight controlling generation guidance (0.2-1.0, default=0.5)
     Returns:
+        tuple[int, np.ndarray]: A tuple containing the sample rate (int) and the generated audio waveform (numpy.ndarray)
     """
     current_model = get_or_load_model()
         outputs=[audio_output],
     )
+demo.launch(mcp_server=True)