import os
from pathlib import Path
from typing import Tuple
import gradio as gr
import spaces
from transformers import pipeline, Pipeline
from huggingface_hub import repo_exists


is_hf_space = os.getenv("IS_HF_SPACE")
model_ids = [
    "",
    "mozilla-ai/whisper-small-gl (Galician)",
    "mozilla-ai/whisper-small-el (Greek)",
    "openai/whisper-tiny (Multilingual)",
    "openai/whisper-small (Multilingual)",
    "openai/whisper-medium (Multilingual)",
    "openai/whisper-large-v3 (Multilingual)",
    "openai/whisper-large-v3-turbo (Multilingual)",
]


def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
    if not Path(model_dir).is_dir():
        return None, f"⚠️ Couldn't find local model directory: {model_dir}"
    from transformers import (
        WhisperProcessor,
        WhisperTokenizer,
        WhisperFeatureExtractor,
        WhisperForConditionalGeneration,
    )

    processor = WhisperProcessor.from_pretrained(model_dir)
    tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
    feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
    model = WhisperForConditionalGeneration.from_pretrained(model_dir)

    return pipeline(
        task="automatic-speech-recognition",
        model=model,
        processor=processor,
        tokenizer=tokenizer,
        feature_extractor=feature_extractor,
    ), f"✅ Local model has been loaded from {model_dir}."


def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
    if not repo_exists(model_repo_id):
        return (
            None,
            f"⚠️ Couldn't find {model_repo_id} on Hugging Face. If its a private repo, make sure you are logged in locally.",
        )
    return pipeline(
        "automatic-speech-recognition",
        model=model_repo_id,
    ), f"✅ HF Model {model_repo_id} has been loaded."


def load_model(
    dropdown_model_id: str, hf_model_id: str, local_model_id: str
) -> Tuple[Pipeline, str]:
    if dropdown_model_id and not hf_model_id and not local_model_id:
        dropdown_model_id = dropdown_model_id.split(" (")[0]
        yield None, f"Loading {dropdown_model_id}..."
        yield _load_hf_model(dropdown_model_id)
    elif hf_model_id and not local_model_id and not dropdown_model_id:
        yield None, f"Loading {hf_model_id}..."
        yield _load_hf_model(hf_model_id)
    elif local_model_id and not hf_model_id and not dropdown_model_id:
        yield None, f"Loading {local_model_id}..."
        yield _load_local_model(local_model_id)
    else:
        yield (
            None,
            "️️⚠️ Please select or fill at least and only one of the options above",
        )


@spaces.GPU
def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
    text = pipe(audio)["text"]
    return text


def setup_gradio_demo():
    with gr.Blocks() as demo:
        gr.Markdown(
            """ # 🗣️ Speech-to-Text Transcription
            ### 1. Select which model to load from one of the options below.
            ### 2. Load the model by clicking the Load model button.
            ### 3. Record a message or upload an audio file.
            ### 4. Click Transcribe to see the transcription generated by the model.
            """
        )
        ### Model selection ###

        with gr.Row():
            with gr.Column():
                dropdown_model = gr.Dropdown(
                    choices=model_ids, label="Option 1: Select a model"
                )
            with gr.Column():
                user_model = gr.Textbox(
                    label="Option 2: Paste HF model id",
                    placeholder="my-username/my-whisper-tiny",
                )
            with gr.Column(visible=not is_hf_space):
                local_model = gr.Textbox(
                    label="Option 3: Paste local path to model directory",
                    placeholder="artifacts/my-whisper-tiny",
                )

        load_model_button = gr.Button("Load model")
        model_loaded = gr.Markdown()

        ### Transcription ###
        audio_input = gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="Record a message / Upload audio file",
            show_download_button=True,
            max_length=30,
        )
        transcribe_button = gr.Button("Transcribe")
        transcribe_output = gr.Text(label="Output")

        ### Event listeners ###
        model = gr.State()
        load_model_button.click(
            fn=load_model,
            inputs=[dropdown_model, user_model, local_model],
            outputs=[model, model_loaded],
        )

        transcribe_button.click(
            fn=transcribe, inputs=[model, audio_input], outputs=transcribe_output
        )

    demo.launch()


if __name__ == "__main__":
    setup_gradio_demo()