import gradio as gr
from transformers import AutoModel, AutoTokenizer

# Load the model
model_name = "wop/kosmox-gguf"
model = AutoModel.from_pretrained(model_name)

# Assuming we need to load a corresponding tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define the chat template
chat_template = "{{ bos_token }}{% for message in messages %}{% if message['from'] == 'human' %}{{' ' + message['value'] + ' '}}{% elif message['from'] == 'gpt' %}{{' ' + message['value'] + ' '}}{% else %}{{'<|' + message['from'] + '|> ' + message['value'] + ' '}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ ' ' }}{% endif %}"

def chat(messages):
    # Prepare the input
    context = chat_template.format(
        bos_token=tokenizer.bos_token,
        messages=messages,
        add_generation_prompt=True
    )
    
    # Tokenize the input
    inputs = tokenizer(context, return_tensors='pt')
    
    # Generate response
    outputs = model.generate(**inputs)
    
    # Decode the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return response

# Define the Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(
                placeholder="Type your message here...",
                label="Your message"
            )
            send_button = gr.Button("Send")
        with gr.Column():
            chat_output = gr.Textbox(
                label="Chatbot response",
                interactive=False
            )

    def respond(message, history):
        history = history or []
        history.append({"from": "human", "value": message})
        
        response = chat(history)
        history.append({"from": "gpt", "value": response})
        
        return history, history[-1]['value']

    send_button.click(respond, [user_input, chatbot], [chatbot, chat_output])

# Launch the Gradio interface
demo.launch()