lora-llama / app.py
MrSimple07's picture
Create app.py
8f29929 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel # LoRA integration
# Load the tokenizer and model
model_name = "MrSimple07/llama_chatbot"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(model_name)
# Load LoRA weights
model = PeftModel.from_pretrained(base_model, model_name)
# Ensure model is in evaluation mode
model.eval()
# Chat function
def chatbot_response(message):
inputs = tokenizer(message, return_tensors="pt").input_ids
outputs = model.generate(inputs, max_length=100, num_return_sequences=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Gradio interface
iface = gr.Interface(fn=chatbot_response,
inputs=gr.inputs.Textbox(lines=7, label="Input your message"),
outputs="text",
title="LLaMA Chatbot with LoRA",
description="This is a chatbot trained with LoRA on the LLaMA model.")
iface.launch()