Xylaria-TTS

Running

App Files Files Community

Xylaria-TTS / app.py

shukdevdatta123

Update app.py

f15a385 verified 27 days ago

raw

history blame

6.37 kB

	import streamlit as st
	import openai
	from kokoro import KPipeline
	import soundfile as sf
	import io

	# Streamlit App UI Setup
	st.title("Text-to-Speech Translator with Kokoro")

	# Expander section to display information in multiple languages
	with st.expander("Sample Prompt!"):
	st.markdown("""
	- My name is Shukdev. (In English)
	- Mi nombre es Shukdev. (In Spanish)
	- Je m'appelle Choukdev. (In French)
	- मेरा नाम शुकदेव है. (In Hindi)
	- Il mio nome è Shukdev. (In Italy)
	- Meu nome é Sukhdev. (In Portuguese, Brazil)
	- 我叫苏赫德夫。(In Chinese)
	- 私の名前はスクデフです。(In Japanese)
	""")

	st.sidebar.header("Configuration & Instructions")

	st.sidebar.markdown("""
	### How to Use the Text-to-Speech App:
	1. Enter Text:
	- Type or paste the text you want to convert to speech in the main text area.

	2. Select Language:
	- Choose the language of the input text. The available language options include:
	- 🇺🇸 English (American English)
	- 🇬🇧 British English
	- 🇪🇸 Spanish
	- 🇫🇷 French
	- 🇮🇳 Hindi
	- 🇮🇹 Italian
	- 🇧🇷 Portuguese (Brazilian)
	- 🇨🇳 Chinese (Mandarin)
	- 🇯🇵 Japanese

	3. Select Voice:
	- Choose the voice you want for the speech. There are multiple voice styles based on tone and gender (e.g., `af_heart`, `af_joy`, etc.).

	4. Adjust Speech Speed:
	- Use the slider to adjust how fast the speech will be generated. The speed can be set from `0.5x` to `2.0x`, with `1.0x` being the default normal speed.

	5. Generate Speech:
	- Once you've selected the text, language, voice, and speed, click the "Generate Audio" button. The app will process the text and generate the speech.

	6. Download Audio:
	- After the audio is generated, you can play it directly within the app or download it as a `.wav` file by clicking the "Download Audio" button.

	### Additional Features:
	- Text Translation:
	- The app can automatically translate the text to English before generating audio. After the translation, you will hear the audio in English with your chosen voice.

	Enjoy exploring different languages, voices, and speeds with the text-to-speech conversion!
	""")

	# User input for text, language, and voice settings
	input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
	lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
	voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
	'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
	'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
	'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
	'ef_dora',
	'em_alex', 'em_santa',
	'ff_siwis',
	'hf_alpha', 'hf_beta',
	'hm_omega', 'hm_psi',
	'if_sara',
	'im_nicola',
	'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
	'jm_kumo',
	'pf_dora',
	'pm_alex', 'pm_santa',
	'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
	'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']
	) # Change voice options as per model
	speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

	# Initialize the TTS pipeline with user-selected language
	pipeline = KPipeline(lang_code=lang_code)

	# Function to get the OpenAI API key from the user
	openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")

	# Function to translate text to English using OpenAI's Chat API
	def translate_to_english(api_key, text, lang_code):
	openai.api_key = api_key
	try:
	# Construct the prompt for translation
	prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"

	response = openai.ChatCompletion.create(
	model="gpt-4", # Using ChatGPT model for translation
	messages=[
	{"role": "system", "content": "You are a helpful assistant that translates text."},
	{"role": "user", "content": prompt}
	]
	)

	# Extract translated text from response
	translated_text = response['choices'][0]['message']['content'].strip()
	return translated_text
	except Exception as e:
	st.error(f"Error occurred during translation: {e}")
	return text # Fallback to original text in case of an error

	# Generate Audio function
	def generate_audio(text, lang_code, voice, speed):
	generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
	for i, (gs, ps, audio) in enumerate(generator):
	audio_data = audio
	# Save audio to in-memory buffer
	buffer = io.BytesIO()
	# Explicitly specify format as WAV
	sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
	buffer.seek(0)
	return buffer

	# Generate and display the audio file
	if st.button('Generate Audio') and openai_api_key:
	st.write("Generating speech for the original text...")
	audio_buffer = generate_audio(input_text, lang_code, voice, speed)

	# Display Audio player for the original language
	st.audio(audio_buffer, format='audio/wav')

	# Optional: Save the generated audio file for download (Original Text)
	st.download_button(
	label="Download Audio (Original Text)",
	data=audio_buffer,
	file_name="generated_speech_original.wav",
	mime="audio/wav"
	)

	# Translate the input text to English using OpenAI
	translated_text = translate_to_english(openai_api_key, input_text, lang_code)

	# Generate audio for the translated English text
	translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)

	# Display Audio for the translated text
	st.write(f"Translated Text: {translated_text}")
	st.audio(translated_audio_buffer, format='audio/wav')

	# Optional: Save the generated audio file for download (Translated Text)
	st.download_button(
	label="Download Audio (Translated to English)",
	data=translated_audio_buffer,
	file_name="generated_speech_translated.wav",
	mime="audio/wav"
	)