Spaces:

Grandediw
/

lab2-fine-tune

Sleeping

lab2-fine-tune / app.py

Grandediw

Update

decbb4f 5 months ago

2.04 kB

	import streamlit as st
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	st.set_page_config(page_title="Hugging Face Chatbot", layout="centered")
	st.title("Hugging Face Chatbot")

	@st.cache_resource
	def load_model():
	# Load tokenizer and model from Hugging Face
	tokenizer = AutoTokenizer.from_pretrained("Grandediw/lora_model_finetuned", use_fast=True)
	model = AutoModelForCausalLM.from_pretrained("Grandediw/lora_model_finetuned", device_map="auto", trust_remote_code=True)
	chat_pipeline = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_length=512,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id
	)
	return chat_pipeline

	chat_pipeline = load_model()

	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# User input
	if prompt := st.chat_input("Ask me anything:"):
	# Display user message and store it
	st.chat_message("user").markdown(prompt)
	st.session_state.messages.append({"role": "user", "content": prompt})

	# Generate response
	with st.spinner("Thinking..."):
	# Using the pipeline to generate a response
	response = chat_pipeline(prompt)[0]["generated_text"]

	# The model may return the prompt + response concatenated, so you might need
	# to extract only the response part. This depends on how the model is trained.
	# Here we assume the model returns the full text and we just remove the original prompt from it:
	if response.startswith(prompt):
	response = response[len(prompt):].strip()

	# Display and store assistant response
	with st.chat_message("assistant"):
	st.markdown(response)
	st.session_state.messages.append({"role": "assistant", "content": response})