import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch from peft import PeftModel # LoRA integration # Load the tokenizer and model model_name = "MrSimple07/llama_chatbot" tokenizer = AutoTokenizer.from_pretrained(model_name) base_model = AutoModelForCausalLM.from_pretrained(model_name) # Load LoRA weights model = PeftModel.from_pretrained(base_model, model_name) # Ensure model is in evaluation mode model.eval() # Chat function def chatbot_response(message): inputs = tokenizer(message, return_tensors="pt").input_ids outputs = model.generate(inputs, max_length=100, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Gradio interface iface = gr.Interface(fn=chatbot_response, inputs=gr.inputs.Textbox(lines=7, label="Input your message"), outputs="text", title="LLaMA Chatbot with LoRA", description="This is a chatbot trained with LoRA on the LLaMA model.") iface.launch()