Spaces:
Build error
Build error
import gradio as gr | |
from TTS.api import TTS | |
import os | |
import tempfile | |
import sounddevice as sd | |
from scipy.io.wavfile import write | |
from concurrent.futures import ThreadPoolExecutor | |
# Agree to Coqui's terms | |
os.environ["COQUI_TOS_AGREED"] = "1" | |
os.environ["OMP_NUM_THREADS"] = "2" # Set CPU threads to 8 (adjust based on your CPU cores) | |
# Load the model and optimize CPU usage | |
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) | |
tts.to("cpu") | |
# Supported languages by the model | |
LANGUAGES = { | |
"English": "en", | |
"Spanish": "es", | |
"German": "de", | |
"French": "fr", | |
"Italian": "it", | |
"Hindi" : "hi", | |
"Russian": "ru", | |
"Turkish": "tr", | |
"Japanese": "ja", | |
"Korean": "ko", | |
"Hungarian": "hu" | |
} | |
# Function to generate voice | |
def generate_voice(text, speaker_audio, language): | |
output_path = tempfile.mktemp(suffix=".wav") | |
tts.tts_to_file( | |
text=text, | |
speaker_wav=speaker_audio, | |
file_path=output_path, | |
language=LANGUAGES.get(language, "en"), | |
sample_rate=44100, | |
) | |
return output_path | |
# Function to record audio from the mic | |
def record_audio(duration=10, filename="mic_input.wav"): | |
fs = 44100 # Sample rate | |
print("Recording...") | |
audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1) | |
sd.wait() # Wait until recording is finished | |
write(filename, fs, audio_data) | |
print(f"Recording saved as {filename}") | |
return filename | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2") | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...") | |
speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath") | |
language_dropdown = gr.Dropdown( | |
label="Select Output Language", | |
choices=list(LANGUAGES.keys()), | |
value="English" | |
) | |
mic_button = gr.Button("Record from Mic") | |
output_audio = gr.Audio(label="Generated Voice", type="filepath") | |
generate_button = gr.Button("Generate Voice") | |
mic_button.click( | |
fn=lambda: record_audio(duration=10), | |
inputs=[], | |
outputs=speaker_audio_input, | |
) | |
generate_button.click( | |
fn=generate_voice, | |
inputs=[text_input, speaker_audio_input, language_dropdown], | |
outputs=output_audio | |
) | |
# Launch the app | |
demo.launch(server_name="0.0.0.0", server_port=7860, share=True) | |