Spaces:

AIdeaText
/

TestOneLlama

Paused

App Files Files Community

TestOneLlama / app.py

AIdeaText

Update app.py

9428cb3 verified 7 months ago

raw

history blame

3.15 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import torch

	class LlamaDemo:
	def __init__(self):
	self.model_name = "meta-llama/Llama-2-70b-chat"
	# Initialize in lazy loading fashion
	self._pipe = None

	@property
	def pipe(self):
	if self._pipe is None:
	self._pipe = pipeline(
	"text-generation",
	model=self.model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)
	return self._pipe

	def generate_response(self, prompt: str, max_length: int = 512) -> str:
	# Format prompt for Llama 2 chat
	formatted_prompt = f"[INST] {prompt} [/INST]"

	# Generate response using pipeline
	response = self.pipe(
	formatted_prompt,
	max_new_tokens=max_length,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True,
	top_p=0.9
	)[0]['generated_text']

	# Extract response after the instruction tag
	return response.split("[/INST]")[-1].strip()

	def main():
	st.set_page_config(
	page_title="Llama 2 Chat Demo",
	page_icon="🦙",
	layout="wide"
	)

	st.title("🦙 Llama 2 Chat Demo")

	# Initialize model
	if 'llama' not in st.session_state:
	with st.spinner("Loading Llama 2... This might take a few minutes..."):
	st.session_state.llama = LlamaDemo()

	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = []

	# Chat interface
	with st.container():
	for message in st.session_state.chat_history:
	with st.chat_message(message["role"]):
	st.write(message["content"])

	if prompt := st.chat_input("What would you like to discuss?"):
	st.session_state.chat_history.append({
	"role": "user",
	"content": prompt
	})

	with st.chat_message("user"):
	st.write(prompt)

	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	try:
	response = st.session_state.llama.generate_response(prompt)
	st.write(response)
	st.session_state.chat_history.append({
	"role": "assistant",
	"content": response
	})
	except Exception as e:
	st.error(f"Error: {str(e)}")

	with st.sidebar:
	st.markdown("""
	### About
	This demo uses Llama-2-70B-chat, a large language model from Meta.

	The model runs with automatic device mapping and mixed precision for optimal performance.
	""")

	if st.button("Clear Chat History"):
	st.session_state.chat_history = []
	st.experimental_rerun()

	if __name__ == "__main__":
	main()