Spaces:
Build error
Build error
File size: 2,365 Bytes
c636952 a416ccf 9581ca3 22fdf85 9581ca3 22fdf85 9581ca3 a416ccf 22fdf85 d8fe51c a416ccf 22fdf85 97fb01c 22fdf85 a416ccf d8fe51c a416ccf 22fdf85 a416ccf c636952 22fdf85 d8fe51c c636952 a416ccf d8fe51c c636952 a416ccf d8fe51c 22fdf85 d8fe51c a416ccf d8fe51c a416ccf d8fe51c 22fdf85 a416ccf 22fdf85 a416ccf c636952 d8fe51c 22fdf85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from TTS.api import TTS
import os
import tempfile
import sounddevice as sd
from scipy.io.wavfile import write
# Agree to Coqui's terms
os.environ["COQUI_TOS_AGREED"] = "1"
# Load the model and optimize CPU usage
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
tts.to("cpu")
# Supported languages by the model
LANGUAGES = {
"English": "en",
"Spanish": "es",
"German": "de",
"French": "fr",
"Italian": "it",
"Hindi" : "hi",
"Russian": "ru",
"Spanish": "es",
"Turkish": "tr",
"Japanese": "ja",
"Korean": "ko",
"Hungarian": "hu"
}
# Function to generate voice
def generate_voice(text, speaker_audio, language):
output_path = tempfile.mktemp(suffix=".wav")
tts.tts_to_file(
text=text,
speaker_wav=speaker_audio,
file_path=output_path,
language=LANGUAGES.get(language, "en")
)
return output_path
# Function to record audio from the mic
def record_audio(duration=10, filename="mic_input.wav"):
fs = 44100 # Sample rate
print("Recording...")
audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1)
sd.wait() # Wait until recording is finished
write(filename, fs, audio_data)
print(f"Recording saved as {filename}")
return filename
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")
with gr.Row():
text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")
language_dropdown = gr.Dropdown(
label="Select Output Language",
choices=list(LANGUAGES.keys()),
value="English"
)
mic_button = gr.Button("Record from Mic")
output_audio = gr.Audio(label="Generated Voice", type="filepath")
generate_button = gr.Button("Generate Voice")
mic_button.click(
fn=lambda: record_audio(duration=10),
inputs=[],
outputs=speaker_audio_input,
)
generate_button.click(
fn=generate_voice,
inputs=[text_input, speaker_audio_input, language_dropdown],
outputs=output_audio
)
# Launch the app
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|