from engine import Piper
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
import os
import json
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
from espeak_phonemizer import Phonemizer

MAX_TXT_LEN = 100

SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']

fonemitzador = Phonemizer("ca")

def carrega_bsc():
    model_path = os.getcwd() + "/models/bsc/best_model.pth"
    config_path = os.getcwd() + "/models/bsc/config.json"
    speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
    vocoder_path = None
    vocoder_config_path = None

    synthesizer = Synthesizer(
        model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
    )

    return synthesizer

def carrega_collectivat():
    model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
    config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
    vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
    vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
    synthesizer = Synthesizer(
        model_path, config_path, None, None, vocoder_path, vocoder_config_path
    )

    return synthesizer

def carrega_piper():
    return Piper(os.getcwd() + "/models/piper/ca-upc_ona-x-low.onnx")


model_bsc = carrega_bsc()
SPEAKERS = model_bsc.tts_model.speaker_manager.speaker_names

model_collectivat = carrega_collectivat()

model_piper = carrega_piper()

def tts(text, speaker_idx):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    print(text)

    # synthesize
    wav_bsc = model_bsc.tts(text, speaker_idx)
    wav_coll = model_collectivat.tts(text)
    wav_piper = model_piper.synthesize(text)

    #return (model_bsc.tts_config.audio["sample_rate"], wav_bsc), (22000, wav_coll), (16000, wav_piper)

    # return output
    fp_bsc = ""
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        model_bsc.save_wav(wav_bsc, fp)
        fp_bsc = fp.name

    fp_coll = ""
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        model_collectivat.save_wav(wav_coll, fp)
        fp_coll = fp.name

    fp_piper = ""
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        fp.write(wav_piper)
        fp_piper = fp.name

    fonemes = fonemitzador.phonemize(text)

    return fonemes, fp_bsc, fp_coll, fp_piper


description="""
Amb aquesta aplicació podeu sintetitzar text a veu amb els últims models lliures pel català.

1. Model multi-parlant VITS entrenat pel BSC (Projecte Aina)
https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker

2. Model Fastspeech entrenat per Col·lectivat
https://github.com/CollectivaT-dev/TTS-API

3. Model VITS entrenat per Piper/Home Assistant
https://github.com/rhasspy/piper

Els dós últims models han estat entrenats amb la veu d'Ona de FestCAT, que va servir com a base per a les veus catalanes de Festival

El primer model conté moltes veus de qualitat variable. Podeu sel·leccionar-ne una altre al desplegable. La veu d'Ona esta sel·leccionada per defecte per la comparativa.

Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
https://github.com/projecte-aina/espeak-ng
"""
article= ""

iface = gr.Interface(
    fn=tts,
    inputs=[
        gr.Textbox(
            label="Text",
            value="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
        ),
        gr.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, value="ona")
    ],
    outputs=[
        gr.Markdown(label="Fonemes"),
        gr.Audio(label="BSC VITS",type="filepath"),
        gr.Audio(label="Collectivat Fastspeech",type="filepath"),
        gr.Audio(label="Piper VITS",type="filepath")
    ],
    title="Comparativa de síntesi lliure en català️",
    description=description,
    article=article,
    allow_flagging="never",
    layout="vertical",
    live=False
)
iface.launch(server_name="0.0.0.0", server_port=7860)