speech

Paused

File size: 2,060 Bytes

5f06acc
 
 
 
00bbc0d
5f06acc
 
 
 
 
 
 
 
5a75523
03f2172
5f06acc
 
00bbc0d
5f06acc
 
 
 
 
 
 
03f2172
5f06acc
5a75523
dda0cbf
5a75523
 
562ed19
 
 
dda0cbf
562ed19
 
5f06acc
5a75523
 
 
 
 
 
 
562ed19
 
 
 
 
5f06acc
00bbc0d
 
 
 
 
 
 
 
 
 
 
 
562ed19
 
5a75523
562ed19
 
 
00bbc0d
 
 
 
562ed19
5f06acc
00bbc0d

import random 
import gradio as gr 
import numpy as np 
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
import streamlit as st

def pad_buffer(audio):
    buffer_size = len(audio)
    element_size = np.dtype(np.int16).itemsize
    if buffer_size % element_size != 0:
        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
    return audio 

def generate_voice(text, voice_name):
    model_name = "eleven_multilingual_v1"
    try:
        audio = generate(
            text[:250],
            voice=voice_name, 
            model=model_name
        )
        return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
    except UnauthenticatedRateLimitError as e:
        raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") 
    except Exception as e:
        raise gr.Error(str(e))

all_voices = voices()
desired_voices = ["Antonio"]
filtered_voices = [voice.name for voice in all_voices if voice.name in desired_voices]

input_text = gr.Textbox(
    label="Input Text (250 characters max)", 
    lines=2, 
    value="Diamo voce alle tue parole, scrivi qui ciò che vuoi ascoltare!",
    elem_id="input_text"
)

input_voice = gr.Dropdown(
    choices=filtered_voices, 
    default="Antonio",
    label="Voice", 
    elem_id="input_voice"
)

out_audio = gr.Audio(
    label="Generated Voice",
    type="numpy", 
    elem_id="out_audio"
)

css = """
    .gr-textbox {
        font-size: 12px;
    }
    .gr-dropdown {
        font-size: 12px;
    }
    .gr-audio {
        width: 90%;
    }
"""

iface = gr.Interface(
    fn=generate_voice,
    inputs=[input_text, input_voice],
    outputs=out_audio,
    live=True,
    theme="Monochrome",
    concurrency_count=1,
    css=css,
    width=320,
    height=480
)

# Using Streamlit to display the Gradio app within Hugging Face Spaces
st.title("TTS App with ElevenLabs")
st.text("Enter your text and choose a voice to generate the speech!")
gr.Interface._display(iface)