import gradio as gr
from TTS.api import TTS
import os
import tempfile
import sounddevice as sd
from scipy.io.wavfile import write
from concurrent.futures import ThreadPoolExecutor

# Agree to Coqui's terms
os.environ["COQUI_TOS_AGREED"] = "1"
os.environ["OMP_NUM_THREADS"] = "2"  # Set CPU threads to 8 (adjust based on your CPU cores)

# Load the model and optimize CPU usage
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
tts.to("cpu")

# Supported languages by the model
LANGUAGES = {
    "English": "en",
    "Spanish": "es",
    "German": "de",
    "French": "fr",
    "Italian": "it",
    "Hindi" : "hi",
    "Russian": "ru", 
    "Turkish": "tr",
    "Japanese": "ja", 
    "Korean": "ko",
    "Hungarian": "hu"
}

# Function to generate voice
def generate_voice(text, speaker_audio, language):
    output_path = tempfile.mktemp(suffix=".wav")
    tts.tts_to_file(
        text=text,
        speaker_wav=speaker_audio,
        file_path=output_path,
        language=LANGUAGES.get(language, "en"),
        sample_rate=44100,  
    )
    return output_path

# Function to record audio from the mic
def record_audio(duration=10, filename="mic_input.wav"):
    fs = 44100  # Sample rate
    print("Recording...")
    audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()  # Wait until recording is finished
    write(filename, fs, audio_data)
    print(f"Recording saved as {filename}")
    return filename

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")

    with gr.Row():
        text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
        speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")
        language_dropdown = gr.Dropdown(
            label="Select Output Language",
            choices=list(LANGUAGES.keys()),
            value="English"
        )
        mic_button = gr.Button("Record from Mic")

    output_audio = gr.Audio(label="Generated Voice", type="filepath")

    generate_button = gr.Button("Generate Voice")

    mic_button.click(
        fn=lambda: record_audio(duration=10),
        inputs=[],
        outputs=speaker_audio_input,
    )

    generate_button.click(
        fn=generate_voice,
        inputs=[text_input, speaker_audio_input, language_dropdown],
        outputs=output_audio
    )

# Launch the app
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)