Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from parler_tts import ParlerTTSForConditionalGeneration
|
4 |
+
from transformers import AutoTokenizer
|
5 |
+
import soundfile as sf
|
6 |
+
|
7 |
+
# Set up the device
|
8 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
9 |
+
|
10 |
+
# Load the model and tokenizer
|
11 |
+
model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to(device)
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-large-v1")
|
13 |
+
|
14 |
+
# Neon-themed styling
|
15 |
+
st.markdown("""
|
16 |
+
<style>
|
17 |
+
body {
|
18 |
+
background-color: #0f0f0f;
|
19 |
+
color: #0fff0f;
|
20 |
+
}
|
21 |
+
.stTextInput, .stTextArea {
|
22 |
+
background-color: #333333;
|
23 |
+
color: #0fff0f;
|
24 |
+
}
|
25 |
+
.stButton > button {
|
26 |
+
background-color: #0fff0f;
|
27 |
+
color: #0f0f0f;
|
28 |
+
}
|
29 |
+
</style>
|
30 |
+
""", unsafe_allow_html=True)
|
31 |
+
|
32 |
+
st.title("🎤 Neon TTS Converter")
|
33 |
+
|
34 |
+
# Predefined voice options
|
35 |
+
voices = {
|
36 |
+
"Smooth Female": "A female speaker delivers a slightly expressive and animated speech with a moderate speed and pitch.",
|
37 |
+
"Monotone Male": "Jon's voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.",
|
38 |
+
"Energetic Youth": "An energetic young speaker with a lively tone and rapid speech, creating a sense of excitement.",
|
39 |
+
"Calm Elderly": "An elderly speaker with a calm and slow-paced voice, bringing wisdom and serenity to the speech.",
|
40 |
+
"Robotic": "A robotic, artificial voice with a consistent pitch and no variation in tone.",
|
41 |
+
"Narrator": "A deep and clear voice, with a strong presence and a slightly slower pace, suitable for narrations.",
|
42 |
+
"Whisper": "A soft, whispered voice, with very low volume and an intimate tone.",
|
43 |
+
"Formal": "A formal, authoritative voice with clear articulation and a steady pace.",
|
44 |
+
"Happy": "A cheerful, upbeat voice with a positive tone and lively intonation.",
|
45 |
+
"Mysterious": "A mysterious and low-pitched voice, with slow delivery and a sense of intrigue."
|
46 |
+
}
|
47 |
+
|
48 |
+
# Sidebar for voice selection
|
49 |
+
st.sidebar.header("Select Voice")
|
50 |
+
voice_choice = st.sidebar.selectbox("Choose a Voice", list(voices.keys()))
|
51 |
+
|
52 |
+
# Display the selected voice description
|
53 |
+
st.sidebar.markdown(f"**Description:** {voices[voice_choice]}")
|
54 |
+
|
55 |
+
# Input for custom prompt
|
56 |
+
st.sidebar.header("Custom Prompt")
|
57 |
+
prompt = st.sidebar.text_area("Enter your custom prompt", value="Hey, how are you doing today?")
|
58 |
+
|
59 |
+
# Error handling
|
60 |
+
try:
|
61 |
+
# Generate the TTS output
|
62 |
+
if st.sidebar.button("Generate Speech"):
|
63 |
+
description = voices[voice_choice]
|
64 |
+
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
|
65 |
+
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
|
66 |
+
|
67 |
+
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
|
68 |
+
audio_arr = generation.cpu().numpy().squeeze()
|
69 |
+
|
70 |
+
# Save the audio file
|
71 |
+
output_file = "parler_tts_out.wav"
|
72 |
+
sf.write(output_file, audio_arr, model.config.sampling_rate)
|
73 |
+
|
74 |
+
# Display the audio player
|
75 |
+
st.audio(output_file)
|
76 |
+
st.success("Speech generation complete!")
|
77 |
+
except Exception as e:
|
78 |
+
st.error(f"An error occurred: {e}")
|