import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from peft import PeftModel st.set_page_config(page_title="Hugging Face Chatbot", layout="centered") st.title("Hugging Face Chatbot with LoRA") @st.cache_resource def load_model(): # Replace this with the actual base model used during LoRA fine-tuning base_model_name = "unsloth/Llama-3.2-1B-Instruct" # Load the base model and tokenizer tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=False) base_model = AutoModelForCausalLM.from_pretrained(base_model_name, trust_remote_code=True) # Load the LoRA adapter weights # Replace "Grandediw/lora_model_finetuned" with your actual LoRA model repo model = PeftModel.from_pretrained(base_model, "Grandediw/lora_model_finetuned") # Create a pipeline for text generation chat_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_length=64, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id ) return chat_pipeline chat_pipeline = load_model() # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # User input if prompt := st.chat_input("Ask me anything:"): # Display user message st.chat_message("user").markdown(prompt) st.session_state.messages.append({"role": "user", "content": prompt}) # Generate response with st.spinner("Thinking..."): # Generate text with the pipeline response = chat_pipeline(prompt)[0]["generated_text"] # Remove the prompt from the start if it's included if response.startswith(prompt): response = response[len(prompt):].strip() # Display assistant response with st.chat_message("assistant"): st.markdown(response) st.session_state.messages.append({"role": "assistant", "content": response})