{get_chat_title(list(MODELS.keys())[0])}

import gradio as gr
from llama_cpp import Llama
import requests

# Define available models
MODELS = {
    "Llama-3.2-3B": {
        "repo_id": "lmstudio-community/Llama-3.2-3B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "Llama-3.2-5B": {
        "repo_id": "lmstudio-community/Llama-3.2-1B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "Phi-3.5-mini": {
        "repo_id": "bartowski/Phi-3.5-mini-instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "Granite-3B": {
        "repo_id": "lmstudio-community/granite-3.0-3b-a800m-instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "Qwen2.5-3B": {
        "repo_id": "lmstudio-community/Qwen2.5-3B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "SmolLM2-1.7B": {
        "repo_id": "HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "Qwen2.5-1.5B": {
        "repo_id": "lmstudio-community/Qwen2.5-1.5B-Instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "Granite-1B": {
        "repo_id": "lmstudio-community/granite-3.0-1b-a400m-instruct-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    },
    "AMD-OLMo-1B": {
        "repo_id": "lmstudio-community/AMD-OLMo-1B-SFT-GGUF",
        "filename": "*Q4_K_M.gguf",
        "chat_format": "chatml"
    }
}

# Initialize with default model
current_model = None

def load_model(model_name):
    global current_model
    model_info = MODELS[model_name]
    current_model = Llama.from_pretrained(
        repo_id=model_info["repo_id"],
        filename=model_info["filename"],
        verbose=True,
        n_ctx=32768,
        n_threads=2,
        chat_format=model_info["chat_format"]
    )
    return current_model

# Initialize with first model
current_model = load_model(list(MODELS.keys())[0])

def respond(
    message,
    history,
    model_name,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    global current_model
    
    # Load new model if changed
    if current_model is None or model_name not in str(current_model.model_path):
        current_model = load_model(model_name)
    
    # Start with system message
    messages = []
    if system_message and system_message.strip():
        messages.append({"role": "system", "content": system_message})

    # Add chat history
    if history:
        messages.extend(history)
    
    # Add current message
    messages.append({"role": "user", "content": message})

    # Generate response
    response = current_model.create_chat_completion(
        messages=messages,
        stream=True,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p
    )
    
    message_repl = ""
    for chunk in response:
        if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
            message_repl = message_repl + chunk['choices'][0]["delta"]["content"]
        yield message_repl

def get_chat_title(model_name):
    return f"{model_name} < - Load different model in Additional Inputs"

with gr.Blocks() as demo:
    with gr.Row():
        title = gr.HTML(value=f"<h1>{get_chat_title(list(MODELS.keys())[0])}</h1>")
    
    with gr.Row():
        chatbot = gr.Chatbot(
            value=[],
            type="messages",
            label="Chat Messages"
        )
    
    with gr.Row():
        msg = gr.Textbox(
            label="Message",
            placeholder="Type your message here...",
            lines=1
        )
        submit = gr.Button("Submit")
    
    with gr.Accordion("Additional Inputs", open=False):
        model_selector = gr.Dropdown(
            choices=list(MODELS.keys()),
            value=list(MODELS.keys())[0],
            label="Select Model",
            interactive=True,
            allow_custom_value=False,
            elem_id="model_selector",
            show_label=True
        )
        system_msg = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
        max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
        temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
    
    gr.Markdown(
        "GGUF is popular model format, try HG models localy in: [LM Studio AI](https://lmstudio.ai) for PC | PocketPal AI ([Android](https://play.google.com/store/apps/details?id=com.pocketpalai) & [iOS](https://play.google.com/store/apps/details?id=com.pocketpalai)) on Tablet or Mobile"
    )
    
    def update_title(model_name):
        return f"<h1>{get_chat_title(model_name)}</h1>"
    
    model_selector.change(
        fn=update_title,
        inputs=[model_selector],
        outputs=[title]
    )
    
    def submit_message(message, chat_history, model_name, system_message, max_tokens, temperature, top_p):
        history = [] if chat_history is None else chat_history
        
        # Add user message first
        history = history + [{"role": "user", "content": message}]
        
        # Then stream the assistant's response
        for response in respond(message, history[:-1], model_name, system_message, max_tokens, temperature, top_p):
            history[-1] = {"role": "user", "content": message}
            history = history + [{"role": "assistant", "content": response}]
            yield history, ""
    
    submit_event = submit.click(
        fn=submit_message,
        inputs=[msg, chatbot, model_selector, system_msg, max_tokens, temperature, top_p],
        outputs=[chatbot, msg],
        show_progress=True,
    )
    
    msg.submit(
        fn=submit_message,
        inputs=[msg, chatbot, model_selector, system_msg, max_tokens, temperature, top_p],
        outputs=[chatbot, msg],
        show_progress=True,
    )
    
    demo.theme = gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="purple",
    )

if __name__ == "__main__":
    demo.launch()