Spaces:

AI-trainer1
/

text_to_speech_test1

Build error

App Files Files Community

text_to_speech_test1 / app.py

AI-trainer1

Update app.py

3cda040 verified 6 months ago

raw

history blame contribute delete

2.51 kB

	import gradio as gr
	from TTS.api import TTS
	import os
	import tempfile
	import sounddevice as sd
	from scipy.io.wavfile import write
	from concurrent.futures import ThreadPoolExecutor

	# Agree to Coqui's terms
	os.environ["COQUI_TOS_AGREED"] = "1"
	os.environ["OMP_NUM_THREADS"] = "2" # Set CPU threads to 8 (adjust based on your CPU cores)

	# Load the model and optimize CPU usage
	tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
	tts.to("cpu")

	# Supported languages by the model
	LANGUAGES = {
	"English": "en",
	"Spanish": "es",
	"German": "de",
	"French": "fr",
	"Italian": "it",
	"Hindi" : "hi",
	"Russian": "ru",
	"Turkish": "tr",
	"Japanese": "ja",
	"Korean": "ko",
	"Hungarian": "hu"
	}

	# Function to generate voice
	def generate_voice(text, speaker_audio, language):
	output_path = tempfile.mktemp(suffix=".wav")
	tts.tts_to_file(
	text=text,
	speaker_wav=speaker_audio,
	file_path=output_path,
	language=LANGUAGES.get(language, "en"),
	sample_rate=44100,
	)
	return output_path

	# Function to record audio from the mic
	def record_audio(duration=10, filename="mic_input.wav"):
	fs = 44100 # Sample rate
	print("Recording...")
	audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1)
	sd.wait() # Wait until recording is finished
	write(filename, fs, audio_data)
	print(f"Recording saved as {filename}")
	return filename

	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")

	with gr.Row():
	text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
	speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")
	language_dropdown = gr.Dropdown(
	label="Select Output Language",
	choices=list(LANGUAGES.keys()),
	value="English"
	)
	mic_button = gr.Button("Record from Mic")

	output_audio = gr.Audio(label="Generated Voice", type="filepath")

	generate_button = gr.Button("Generate Voice")

	mic_button.click(
	fn=lambda: record_audio(duration=10),
	inputs=[],
	outputs=speaker_audio_input,
	)

	generate_button.click(
	fn=generate_voice,
	inputs=[text_input, speaker_audio_input, language_dropdown],
	outputs=output_audio
	)

	# Launch the app
	demo.launch(server_name="0.0.0.0", server_port=7860, share=True)