File size: 1,631 Bytes
5f06acc
 
 
 
 
 
 
 
 
 
 
 
 
03f2172
 
5f06acc
 
03f2172
5f06acc
 
 
 
 
 
 
03f2172
5f06acc
562ed19
 
 
 
 
 
5f06acc
562ed19
 
 
 
 
 
 
5f06acc
562ed19
 
 
 
 
5f06acc
562ed19
 
 
 
 
 
 
 
5f06acc
562ed19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import random 
import gradio as gr 
import numpy as np 
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError

def pad_buffer(audio):
    # Pad buffer to multiple of 2 bytes
    buffer_size = len(audio)
    element_size = np.dtype(np.int16).itemsize
    if buffer_size % element_size != 0:
        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
    return audio 

def generate_voice(text, voice_name):
    model_name = "eleven_multilingual_v1"
    try:
        audio = generate(
            text[:250],  # Limit to 250 characters
            voice=voice_name, 
            model=model_name
        )
        return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
    except UnauthenticatedRateLimitError as e:
        raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") 
    except Exception as e:
        raise gr.Error(str(e))

input_text = gr.Textbox(
    label="Input Text (250 characters max)", 
    lines=2, 
    value="Hahaha OHH MY GOD! This is SOOO funny, I-I am Eleven a text-to-speech system!",
    elem_id="input_text"
)

all_voices = voices() 
input_voice = gr.Dropdown(
    [voice.name for voice in all_voices], 
    value="Arnold",
    label="Voice", 
    elem_id="input_voice"
)

out_audio = gr.Audio(
    label="Generated Voice",
    type="numpy", 
    elem_id="out_audio"
)

iface = gr.Interface(
    fn=generate_voice,
    inputs=[input_text, input_voice],
    outputs=out_audio,
    live=True,
    theme="Monochrome",
    concurrency_count=1
)

iface.launch(debug=True)