File size: 3,975 Bytes
3b1f0f3
 
 
 
 
5bd7020
 
 
 
 
 
 
 
770a448
3b1f0f3
8a1ba2b
4306d26
8a1ba2b
770a448
8a1ba2b
8aa19f9
3b1f0f3
 
 
 
8aa19f9
3b1f0f3
319ac54
0584b7c
 
 
ea2b509
a005c5c
 
 
 
 
 
 
6e98ca7
a005c5c
 
 
 
 
 
6e98ca7
a005c5c
 
 
 
6e98ca7
a005c5c
 
 
 
 
 
6e98ca7
a005c5c
6e98ca7
a005c5c
6e98ca7
a005c5c
 
 
 
ea2b509
 
 
6e98ca7
a005c5c
6e98ca7
 
 
 
a005c5c
a1c99ef
6e98ca7
a1c99ef
3b1f0f3
cbbd8a5
 
 
 
 
84bfa6f
 
0584b7c
84bfa6f
 
3b1f0f3
 
4306d26
3b1f0f3
dce5f84
3b1f0f3
 
 
 
 
 
770a448
3b1f0f3
 
 
 
 
 
 
 
 
 
 
 
0ad3caf
 
3b1f0f3
 
770a448
3b1f0f3
 
 
 
 
 
 
 
 
dce5f84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import random 
import gradio as gr 
import numpy as np 
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError

def pad_buffer(audio):
    # Pad buffer to multiple of 2 bytes
    buffer_size = len(audio)
    element_size = np.dtype(np.int16).itemsize
    if buffer_size % element_size != 0:
        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
    return audio 

def generate_voice(text, voice_name):
    try:
        audio = generate(
            text[:250], # Limit to 250 characters
            voice=voice_name, 
            model="eleven_multilingual_v2"
        )
        return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
    except UnauthenticatedRateLimitError as e:
        raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") 
    except Exception as e:
        raise gr.Error(e)
    

badges = """

"""
menu = """
<style>
    .menu-bar {
        display: flex;
        justify-content: space-between;
        align-items: center;
        background-color: #333;
        padding: 10px;
        color: white;
        font-family: 'sans-serif';  /* Use sans-serif font */
    }
    .menu-icon {
        font-size: 24px;
        background-color: #ffffff;
        border-radius: 50%;
        padding: 5px;
        text-decoration: none;  /* Remove underline from emoji */
    }
    .menu-items {
        display: flex;
        gap: 15px;
        color: white;
    }
    .menu-item {
        padding: 8px 16px;
        background-color: #555;
        border-radius: 4px;
        transition: background-color 0.3s;
        font-weight: bold;  /* Make text bold */
    }
    .menu-item a, .menu-icon a {
        color: white;
        text-decoration: none;  /* Remove underline */
    }
    .menu-item:hover {
        background-color: #777;
    }
</style>

<div class="menu-bar">
    <a href="#" class="menu-icon">🎵</a>  <!-- Make emoji clickable -->
    <div class="menu-items">
        <span class="menu-item"><a href="#">Dashboard</a></span>
        <span class="menu-item"><a href="#">Premium Apps</a></span>
        <span class="menu-item"><a href="#">My Account</a></span>
        <span class="menu-item"><a href="#">Train Voice</a></span>
    </div>
</div>

"""

description = """
Welcome to AI Tutor Text to Voice, single foundational model supporting 28 languages including: English, Chinese, Spanish, Hindi, Portuguese, French, German, Japanese, Arabic, Korean, Indonesian, Italian, Dutch, Turkish, Polish, Swedish, Filipino, Malay, Romanian, Ukrainian, Greek, Czech, Danish, Finnish, Bulgarian, Croatian, Slovak, and Tamil. Sign up on [ElevenLabs](https://elevenlabs.io) to get fast access, long-form generation, voice cloning, API keys, and more!
"""


with gr.Blocks() as block:
    gr.Markdown('')
    gr.Markdown(menu)  # Added the menu here
    gr.Markdown(badges)
    gr.Markdown(description)
    
    input_text = gr.Textbox(
        label="Input Text (250 characters max)", 
        lines=2, 
        value="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! Γειά σας! Здравей! வணக்கம்!",
        elem_id="input_text"
    )

    all_voices = voices() 
    input_voice = gr.Dropdown(
        [ voice.name for voice in all_voices ], 
        value="Bella",
        label="Voice", 
        elem_id="input_voice"
    )

    run_button = gr.Button(
        text="Generate Voice", 
        type="button"
    )

    out_audio = gr.Audio(
        label="Generated Voice",
        type="numpy", 
        elem_id="out_audio",
        format="mp3"
    )
        
    inputs = [input_text, input_voice]
    outputs = [out_audio]
    
    run_button.click(
        fn=generate_voice, 
        inputs=inputs, 
        outputs=outputs, 
        queue=True
    )

block.queue(concurrency_count=5).launch(debug=True)