Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
st.set_page_config(page_title="Hugging Face Chatbot", layout="centered") | |
st.title("Hugging Face Chatbot") | |
def load_model(): | |
# Load tokenizer and model from Hugging Face | |
tokenizer = AutoTokenizer.from_pretrained("Grandediw/lora_model_finetuned", use_fast=True) | |
model = AutoModelForCausalLM.from_pretrained("Grandediw/lora_model_finetuned", device_map="auto", trust_remote_code=True) | |
chat_pipeline = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_length=512, | |
temperature=0.7, | |
top_p=0.9, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
return chat_pipeline | |
chat_pipeline = load_model() | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# User input | |
if prompt := st.chat_input("Ask me anything:"): | |
# Display user message and store it | |
st.chat_message("user").markdown(prompt) | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
# Generate response | |
with st.spinner("Thinking..."): | |
# Using the pipeline to generate a response | |
response = chat_pipeline(prompt)[0]["generated_text"] | |
# The model may return the prompt + response concatenated, so you might need | |
# to extract only the response part. This depends on how the model is trained. | |
# Here we assume the model returns the full text and we just remove the original prompt from it: | |
if response.startswith(prompt): | |
response = response[len(prompt):].strip() | |
# Display and store assistant response | |
with st.chat_message("assistant"): | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |