Spaces:

ResembleAI
/

Chatterbox

Running on Zero

App Files Files Community

app.py

#15

by Vanjo - opened Jul 4

base: refs/heads/main

←

from: refs/pr/15

Discussion Files changed

-3

Files changed (1) hide show

app.py +3 -3

app.py CHANGED Viewed

@@ -59,7 +59,7 @@ def generate_tts_audio(
     maintains the prosody, tone, and vocal qualities of the reference speaker, or uses default voice if no reference is provided.
     Args:
-        text_input (str): The text to synthesize into speech (maximum 300 characters)
         audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
         exaggeration_input (float, optional): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable). Defaults to 0.5.
         temperature_input (float, optional): Controls randomness in generation (0.05-5.0, higher=more varied). Defaults to 0.8.
@@ -90,7 +90,7 @@ def generate_tts_audio(
         generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
     wav = current_model.generate(
-        text_input[:300],  # Truncate text to max chars
         **generate_kwargs
     )
     print("Audio generation complete.")
@@ -107,7 +107,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             text = gr.Textbox(
                 value="Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.",
-                label="Text to synthesize (max chars 300)",
                 max_lines=5
             )
             ref_wav = gr.Audio(

     maintains the prosody, tone, and vocal qualities of the reference speaker, or uses default voice if no reference is provided.
     Args:
+        text_input (str): The text to synthesize into speech (maximum 1000 characters)
         audio_prompt_path_input (str, optional): File path or URL to the reference audio file that defines the target voice style. Defaults to None.
         exaggeration_input (float, optional): Controls speech expressiveness (0.25-2.0, neutral=0.5, extreme values may be unstable). Defaults to 0.5.
         temperature_input (float, optional): Controls randomness in generation (0.05-5.0, higher=more varied). Defaults to 0.8.
         generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
     wav = current_model.generate(
+        text_input[:1000],  # Truncate text to max chars
         **generate_kwargs
     )
     print("Audio generation complete.")
         with gr.Column():
             text = gr.Textbox(
                 value="Now let's make my mum's favourite. So three mars bars into the pan. Then we add the tuna and just stir for a bit, just let the chocolate and fish infuse. A sprinkle of olive oil and some tomato ketchup. Now smell that. Oh boy this is going to be incredible.",
+                label="Text to synthesize (max chars 1000)",
                 max_lines=5
             )
             ref_wav = gr.Audio(