|
import random |
|
import gradio as gr |
|
import numpy as np |
|
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError |
|
import streamlit as st |
|
|
|
def pad_buffer(audio): |
|
buffer_size = len(audio) |
|
element_size = np.dtype(np.int16).itemsize |
|
if buffer_size % element_size != 0: |
|
audio = audio + b'\0' * (element_size - (buffer_size % element_size)) |
|
return audio |
|
|
|
def generate_voice(text, voice_name): |
|
model_name = "eleven_multilingual_v1" |
|
try: |
|
audio = generate( |
|
text[:250], |
|
voice=voice_name, |
|
model=model_name |
|
) |
|
return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16)) |
|
except UnauthenticatedRateLimitError as e: |
|
raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") |
|
except Exception as e: |
|
raise gr.Error(str(e)) |
|
|
|
all_voices = voices() |
|
desired_voices = ["Antonio"] |
|
filtered_voices = [voice.name for voice in all_voices if voice.name in desired_voices] |
|
|
|
input_text = gr.Textbox( |
|
label="Input Text (250 characters max)", |
|
lines=2, |
|
value="Diamo voce alle tue parole, scrivi qui ciò che vuoi ascoltare!", |
|
elem_id="input_text" |
|
) |
|
|
|
input_voice = gr.Dropdown( |
|
choices=filtered_voices, |
|
default="Antonio", |
|
label="Voice", |
|
elem_id="input_voice" |
|
) |
|
|
|
out_audio = gr.Audio( |
|
label="Generated Voice", |
|
type="numpy", |
|
elem_id="out_audio" |
|
) |
|
|
|
css = """ |
|
.gr-textbox { |
|
font-size: 12px; |
|
} |
|
.gr-dropdown { |
|
font-size: 12px; |
|
} |
|
.gr-audio { |
|
width: 90%; |
|
} |
|
""" |
|
|
|
iface = gr.Interface( |
|
fn=generate_voice, |
|
inputs=[input_text, input_voice], |
|
outputs=out_audio, |
|
live=True, |
|
theme="Monochrome", |
|
concurrency_count=1, |
|
css=css, |
|
width=320, |
|
height=480 |
|
) |
|
|
|
|
|
st.title("TTS App with ElevenLabs") |
|
st.text("Enter your text and choose a voice to generate the speech!") |
|
gr.Interface._display(iface) |