ElevenLabDemo / app.py
thesab's picture
Update app.py
0cfe795 verified
import random
import gradio as gr
import numpy as np
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError
def pad_buffer(audio):
# Pad buffer to multiple of 2 bytes
buffer_size = len(audio)
element_size = np.dtype(np.int16).itemsize
if buffer_size % element_size != 0:
audio = audio + b'\0' * (element_size - (buffer_size % element_size))
return audio
def generate_voice(text, voice_name):
try:
audio = generate(
text[:250], # Limit to 250 characters
voice=voice_name,
model="eleven_multilingual_v2"
)
return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
except UnauthenticatedRateLimitError as e:
raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.")
except Exception as e:
raise gr.Error(e)
description = """
Here's a demonstration of the world's most advanced TTS systems, created by ElevenLabs. ๐ŸŽ‰ I wanted to experiment with this amazing technology for the Italian language ๐Ÿ‡ฎ๐Ÿ‡น, and I'm excited to share its capabilities with you! Eleven Multilingual V2 is a single foundational model supporting an impressive 28 languages, including English ๐Ÿ‡ฌ๐Ÿ‡ง, Chinese ๐Ÿ‡จ๐Ÿ‡ณ, Spanish ๐Ÿ‡ช๐Ÿ‡ธ, Hindi ๐Ÿ‡ฎ๐Ÿ‡ณ, Portuguese ๐Ÿ‡ต๐Ÿ‡น, French ๐Ÿ‡ซ๐Ÿ‡ท, German ๐Ÿ‡ฉ๐Ÿ‡ช, Japanese ๐Ÿ‡ฏ๐Ÿ‡ต, Arabic ๐Ÿ‡ธ๐Ÿ‡ฆ, Korean ๐Ÿ‡ฐ๐Ÿ‡ท, Indonesian ๐Ÿ‡ฎ๐Ÿ‡ฉ, Italian ๐Ÿ‡ฎ๐Ÿ‡น, Dutch ๐Ÿ‡ณ๐Ÿ‡ฑ, Turkish ๐Ÿ‡น๐Ÿ‡ท, Polish ๐Ÿ‡ต๐Ÿ‡ฑ, Swedish ๐Ÿ‡ธ๐Ÿ‡ช, Filipino ๐Ÿ‡ต๐Ÿ‡ญ, Malay ๐Ÿ‡ฒ๐Ÿ‡พ, Romanian ๐Ÿ‡ท๐Ÿ‡ด, Ukrainian ๐Ÿ‡บ๐Ÿ‡ฆ, Greek ๐Ÿ‡ฌ๐Ÿ‡ท, Czech ๐Ÿ‡จ๐Ÿ‡ฟ, Danish ๐Ÿ‡ฉ๐Ÿ‡ฐ, Finnish ๐Ÿ‡ซ๐Ÿ‡ฎ, Bulgarian ๐Ÿ‡ง๐Ÿ‡ฌ, Croatian ๐Ÿ‡ญ๐Ÿ‡ท, Slovak ๐Ÿ‡ธ๐Ÿ‡ฐ, and Tamil ๐Ÿ‡ฑ๐Ÿ‡ฐ. ๐ŸŒ Sign up on ElevenLabs to get fast access to long-form generation, voice cloning, API keys, and more! ๐Ÿš€
"""
with gr.Blocks() as block:
gr.Markdown('[ ![ElevenLabs](https://user-images.githubusercontent.com/12028621/262629275-4f85c9cf-85b6-435e-ab50-5b8c7c4e9dd2.png) ](https://elevenlabs.io)')
gr.Markdown(description)
input_text = gr.Textbox(
label="Input Text (250 characters max)",
lines=2,
value="Ciao, mi chiamo Sab, sono un ragazzo italiano appassionato di AI e nuove tecnologie!",
elem_id="input_text"
)
all_voices = voices()
input_voice = gr.Dropdown(
[ voice.name for voice in all_voices ],
value="Callum",
label="Voice",
elem_id="input_voice"
)
run_button = gr.Button(
value="Generate Voice",
)
out_audio = gr.Audio(
label="Generated Voice",
type="numpy",
elem_id="out_audio",
format="mp3"
)
inputs = [input_text, input_voice]
outputs = [out_audio]
run_button.click(
fn=generate_voice,
inputs=inputs,
outputs=outputs,
queue=True
)
block.launch(debug=False, show_error=True, share=True)