Xylaria-TTS

Running

App Files Files Community

Xylaria-TTS / app.py

Reality123b

Update app.py

b95bbb4 verified 11 days ago

raw

history blame contribute delete

5.3 kB

	import streamlit as st
	import openai
	from kokoro import KPipeline
	import soundfile as sf
	import io
	import time

	# Streamlit App UI Setup
	st.title("Text-to-Speech Translator with Kokoro")

	# Expander section to display information in multiple languages
	with st.expander("Sample Prompt!"):
	st.markdown("""
	hi
	""")

	st.sidebar.markdown("""
	""")

	st.sidebar.header("")

	st.sidebar.markdown("""

	""")

	# User input for text, language, and voice settings
	input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
	lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
	voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
	'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
	'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
	'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
	'ef_dora',
	'em_alex', 'em_santa',
	'ff_siwis',
	'hf_alpha', 'hf_beta',
	'hm_omega', 'hm_psi',
	'if_sara',
	'im_nicola',
	'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
	'jm_kumo',
	'pf_dora',
	'pm_alex', 'pm_santa',
	'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
	'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']
	) # Change voice options as per model
	speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

	# Initialize the TTS pipeline with user-selected language
	pipeline = KPipeline(lang_code=lang_code)

	# Function to get the OpenAI API key from the user (optional for translation)
	openai_api_key = st.text_input("Enter your OpenAI API Key (Optional for Translation)", type="password")

	# Function to translate text to English using OpenAI's Chat API
	def translate_to_english(api_key, text, lang_code):
	openai.api_key = api_key
	try:
	# Construct the prompt for translation
	prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"

	response = openai.ChatCompletion.create(
	model="gpt-4", # Using ChatGPT model for translation
	messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
	{"role": "user", "content": prompt}]
	)

	# Extract translated text from response, removing any additional context or prefixes
	translated_text = response['choices'][0]['message']['content'].strip()

	# Clean up any unwanted prefixes or context
	if translated_text.lower().startswith("the translated text"):
	translated_text = translated_text.split(":", 1)[1].strip()

	return translated_text
	except Exception as e:
	st.error(f"Error occurred during translation: {e}")
	return text # Fallback to original text in case of an error

	# Generate Audio function
	def generate_audio(text, lang_code, voice, speed):
	generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
	audio_data = None
	for i, (gs, ps, audio) in enumerate(generator):
	audio_data = audio
	time.sleep(1) # Simulate processing delay for the spinner (this can be removed or adjusted)
	# Save audio to in-memory buffer
	buffer = io.BytesIO()
	# Explicitly specify format as WAV
	sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
	buffer.seek(0)
	return buffer

	# Generate and display the audio file
	if st.button('Generate Audio'):

	# Show the spinner with percentage count
	with st.spinner("Generating audio... 0%"):
	for i in range(1, 101, 10):
	time.sleep(0.5) # Simulate work being done, you can adjust timing here
	st.spinner(f"Generating audio... {i}%")

	# Generate audio for the original text after the spinner
	st.write("Generating speech for the original text...")
	audio_buffer = generate_audio(input_text, lang_code, voice, speed)

	# Display Audio player for the original language
	st.audio(audio_buffer, format='audio/wav')

	# Optional: Save the generated audio file for download (Original Text)
	st.download_button(
	label="Download Audio (Original Text)",
	data=audio_buffer,
	file_name="generated_speech_original.wav",
	mime="audio/wav"
	)

	# Check if OpenAI API Key is provided for translation and English audio generation
	if openai_api_key:
	# Translate the input text to English using OpenAI
	translated_text = translate_to_english(openai_api_key, input_text, lang_code)

	# Generate audio for the translated English text
	translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)

	# Display Audio for the translated text
	st.write(f"Translated Text: {translated_text}")
	st.audio(translated_audio_buffer, format='audio/wav')

	# Optional: Save the generated audio file for download (Translated Text)
	st.download_button(
	label="Download Audio (Translated to English)",
	data=translated_audio_buffer,
	file_name="generated_speech_translated.wav",
	mime="audio/wav"
	)