import os
import gradio as gr
from huggingface_hub import InferenceClient

# Setup clients for each provider
llama_client = InferenceClient(provider="sambanova", api_key=os.environ["HF_TOKEN"])
minimax_client = InferenceClient(provider="novita", api_key=os.environ["HF_TOKEN"])
mistral_client = InferenceClient(provider="together", api_key=os.environ["HF_TOKEN"])

# Context-aware response function
def chat_with_model(model_choice, prompt, image_url, chat_history):
    if not prompt:
        return "Please enter a text prompt.", chat_history

    if chat_history is None:
        chat_history = []

    try:
        # === LLaMA 4 ===
        if model_choice == "LLaMA 4 (SambaNova)":
            user_msg = [{"type": "text", "text": prompt}]
            if image_url:
                user_msg.append({"type": "image_url", "image_url": {"url": image_url}})
            chat_history.append({"role": "user", "content": user_msg})

            response = llama_client.chat.completions.create(
                model="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
                messages=chat_history
            )
            bot_msg = response.choices[0].message.content
            chat_history.append({"role": "assistant", "content": bot_msg})
            return bot_msg, chat_history

        # === MiniMax ===
        elif model_choice == "MiniMax M1 (Novita)":
            chat_history.append({"role": "user", "content": prompt})
            response = minimax_client.chat.completions.create(
                model="MiniMaxAI/MiniMax-M1-80k",
                messages=chat_history
            )
            bot_msg = response.choices[0].message.content
            chat_history.append({"role": "assistant", "content": bot_msg})
            return bot_msg, chat_history

        # === Mistral ===
        elif model_choice == "Mistral Mixtral-8x7B (Together)":
            chat_history.append({"role": "user", "content": prompt})
            response = mistral_client.chat.completions.create(
                model="mistralai/Mixtral-8x7B-Instruct-v0.1",
                messages=chat_history
            )
            bot_msg = response.choices[0].message.content
            chat_history.append({"role": "assistant", "content": bot_msg})
            return bot_msg, chat_history

        else:
            return "Unsupported model selected.", chat_history

    except Exception as e:
        return f"Error: {e}", chat_history

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Multi-Model Context-Aware Chatbot")
    gr.Markdown("Supports LLaMA 4 (with optional image), MiniMax, and Mistral. Conversation memory is preserved.")

    model_dropdown = gr.Dropdown(
        choices=[
            "LLaMA 4 (SambaNova)",
            "MiniMax M1 (Novita)",
            "Mistral Mixtral-8x7B (Together)"
        ],
        value="LLaMA 4 (SambaNova)",
        label="Select Model"
    )

    prompt_input = gr.Textbox(label="Text Prompt", placeholder="Ask something...", lines=2)
    image_url_input = gr.Textbox(label="Optional Image URL (for LLaMA only)", placeholder="https://example.com/image.jpg")

    submit_btn = gr.Button("Generate Response")
    reset_btn = gr.Button("🔄 Reset Conversation")
    output_box = gr.Textbox(label="Response", lines=8)

    state = gr.State([])

    submit_btn.click(
        fn=chat_with_model,
        inputs=[model_dropdown, prompt_input, image_url_input, state],
        outputs=[output_box, state]
    )

    reset_btn.click(
        fn=lambda: ("Conversation reset. You can start a new one.", []),
        inputs=[],
        outputs=[output_box, state]
    )

demo.launch()