Spaces:
Running
Running
import streamlit as st | |
import openai | |
from kokoro import KPipeline | |
import soundfile as sf | |
import io | |
import time | |
# Streamlit App UI Setup | |
st.title("Text-to-Speech Translator with Kokoro") | |
# Expander section to display information in multiple languages | |
with st.expander("Sample Prompt!"): | |
st.markdown(""" | |
hi | |
""") | |
st.sidebar.markdown(""" | |
""") | |
st.sidebar.header("") | |
st.sidebar.markdown(""" | |
""") | |
# User input for text, language, and voice settings | |
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...") | |
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j']) | |
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', | |
'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', | |
'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', | |
'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', | |
'ef_dora', | |
'em_alex', 'em_santa', | |
'ff_siwis', | |
'hf_alpha', 'hf_beta', | |
'hm_omega', 'hm_psi', | |
'if_sara', | |
'im_nicola', | |
'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro', | |
'jm_kumo', | |
'pf_dora', | |
'pm_alex', 'pm_santa', | |
'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', | |
'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'] | |
) # Change voice options as per model | |
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1) | |
# Initialize the TTS pipeline with user-selected language | |
pipeline = KPipeline(lang_code=lang_code) | |
# Function to get the OpenAI API key from the user (optional for translation) | |
openai_api_key = st.text_input("Enter your OpenAI API Key (Optional for Translation)", type="password") | |
# Function to translate text to English using OpenAI's Chat API | |
def translate_to_english(api_key, text, lang_code): | |
openai.api_key = api_key | |
try: | |
# Construct the prompt for translation | |
prompt = f"Translate the following text from {lang_code} to English: \n\n{text}" | |
response = openai.ChatCompletion.create( | |
model="gpt-4", # Using ChatGPT model for translation | |
messages=[{"role": "system", "content": "You are a helpful assistant that translates text."}, | |
{"role": "user", "content": prompt}] | |
) | |
# Extract translated text from response, removing any additional context or prefixes | |
translated_text = response['choices'][0]['message']['content'].strip() | |
# Clean up any unwanted prefixes or context | |
if translated_text.lower().startswith("the translated text"): | |
translated_text = translated_text.split(":", 1)[1].strip() | |
return translated_text | |
except Exception as e: | |
st.error(f"Error occurred during translation: {e}") | |
return text # Fallback to original text in case of an error | |
# Generate Audio function | |
def generate_audio(text, lang_code, voice, speed): | |
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+') | |
audio_data = None | |
for i, (gs, ps, audio) in enumerate(generator): | |
audio_data = audio | |
time.sleep(1) # Simulate processing delay for the spinner (this can be removed or adjusted) | |
# Save audio to in-memory buffer | |
buffer = io.BytesIO() | |
# Explicitly specify format as WAV | |
sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"' | |
buffer.seek(0) | |
return buffer | |
# Generate and display the audio file | |
if st.button('Generate Audio'): | |
# Show the spinner with percentage count | |
with st.spinner("Generating audio... 0%"): | |
for i in range(1, 101, 10): | |
time.sleep(0.5) # Simulate work being done, you can adjust timing here | |
st.spinner(f"Generating audio... {i}%") | |
# Generate audio for the original text after the spinner | |
st.write("Generating speech for the original text...") | |
audio_buffer = generate_audio(input_text, lang_code, voice, speed) | |
# Display Audio player for the original language | |
st.audio(audio_buffer, format='audio/wav') | |
# Optional: Save the generated audio file for download (Original Text) | |
st.download_button( | |
label="Download Audio (Original Text)", | |
data=audio_buffer, | |
file_name="generated_speech_original.wav", | |
mime="audio/wav" | |
) | |
# Check if OpenAI API Key is provided for translation and English audio generation | |
if openai_api_key: | |
# Translate the input text to English using OpenAI | |
translated_text = translate_to_english(openai_api_key, input_text, lang_code) | |
# Generate audio for the translated English text | |
translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed) | |
# Display Audio for the translated text | |
st.write(f"Translated Text: {translated_text}") | |
st.audio(translated_audio_buffer, format='audio/wav') | |
# Optional: Save the generated audio file for download (Translated Text) | |
st.download_button( | |
label="Download Audio (Translated to English)", | |
data=translated_audio_buffer, | |
file_name="generated_speech_translated.wav", | |
mime="audio/wav" | |
) |