import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel  # LoRA integration

# Load the tokenizer and model
model_name = "MrSimple07/llama_chatbot"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(model_name)

# Load LoRA weights
model = PeftModel.from_pretrained(base_model, model_name)

# Ensure model is in evaluation mode
model.eval()

# Chat function
def chatbot_response(message):
    inputs = tokenizer(message, return_tensors="pt").input_ids
    outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Gradio interface
iface = gr.Interface(fn=chatbot_response, 
                     inputs=gr.inputs.Textbox(lines=7, label="Input your message"), 
                     outputs="text",
                     title="LLaMA Chatbot with LoRA",
                     description="This is a chatbot trained with LoRA on the LLaMA model.")

iface.launch()