Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from kokoro import KPipeline
|
3 |
+
import soundfile as sf
|
4 |
+
import io
|
5 |
+
|
6 |
+
# Streamlit App UI Setup
|
7 |
+
st.title("Text-to-Speech with Kokoro")
|
8 |
+
st.sidebar.header("Configuration & Instructions")
|
9 |
+
|
10 |
+
# Sidebar Instructions
|
11 |
+
st.sidebar.markdown("""
|
12 |
+
### How to Use the Text-to-Speech App:
|
13 |
+
|
14 |
+
1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
|
15 |
+
|
16 |
+
2. **Select Language**:
|
17 |
+
- Choose the language of the text you are entering. Available options include:
|
18 |
+
- ๐บ๐ธ American English (`a`)
|
19 |
+
- ๐ฌ๐ง British English (`b`)
|
20 |
+
- ๐ช๐ธ Spanish (`e`)
|
21 |
+
- ๐ซ๐ท French (`f`)
|
22 |
+
- ๐ฎ๐ณ Hindi (`h`)
|
23 |
+
- ๐ฎ๐น Italian (`i`)
|
24 |
+
- ๐ง๐ท Brazilian Portuguese (`p`)
|
25 |
+
- ๐ฏ๐ต Japanese (`j`)
|
26 |
+
- ๐จ๐ณ Mandarin Chinese (`z`)
|
27 |
+
|
28 |
+
3. **Select Voice**:
|
29 |
+
- Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc.
|
30 |
+
|
31 |
+
4. **Adjust Speed**:
|
32 |
+
- Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
|
33 |
+
|
34 |
+
5. **Generate Speech**:
|
35 |
+
- After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
|
36 |
+
|
37 |
+
6. **Download**:
|
38 |
+
- Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
|
39 |
+
|
40 |
+
Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
|
41 |
+
""")
|
42 |
+
|
43 |
+
# User input for text, language, and voice settings
|
44 |
+
input_text = st.text_area("Enter your text here", "The sky above the port was the color of television...")
|
45 |
+
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'j', 'z'])
|
46 |
+
voice = st.selectbox("Select Voice", ['af_heart', 'af_joy', 'af_female', 'af_male']) # Change voice options as per model
|
47 |
+
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
|
48 |
+
|
49 |
+
# Initialize the TTS pipeline with user-selected language
|
50 |
+
pipeline = KPipeline(lang_code=lang_code)
|
51 |
+
|
52 |
+
# Generate Audio function
|
53 |
+
def generate_audio(text, lang_code, voice, speed):
|
54 |
+
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
|
55 |
+
for i, (gs, ps, audio) in enumerate(generator):
|
56 |
+
audio_data = audio
|
57 |
+
# Save audio to in-memory buffer
|
58 |
+
buffer = io.BytesIO()
|
59 |
+
sf.write(buffer, audio_data, 24000)
|
60 |
+
buffer.seek(0)
|
61 |
+
return buffer
|
62 |
+
|
63 |
+
# Generate and display the audio file
|
64 |
+
if st.button('Generate Audio'):
|
65 |
+
st.write("Generating speech...")
|
66 |
+
audio_buffer = generate_audio(input_text, lang_code, voice, speed)
|
67 |
+
|
68 |
+
# Display Audio player in the app
|
69 |
+
st.audio(audio_buffer, format='audio/wav')
|
70 |
+
|
71 |
+
# Optional: Save the generated audio file for download
|
72 |
+
st.download_button(
|
73 |
+
label="Download Audio",
|
74 |
+
data=audio_buffer,
|
75 |
+
file_name="generated_speech.wav",
|
76 |
+
mime="audio/wav"
|
77 |
+
)
|