Spaces:

namannn
/

hyperbolic-clusters

Sleeping

App Files Files Community

hyperbolic-clusters / app.py

namannn

Update app.py

a992249 verified 4 months ago

raw

history blame

2.84 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	@st.cache_resource
	def load_model_and_tokenizer():
	"""
	Load model and tokenizer with Streamlit's caching to prevent reloading.
	@st.cache_resource ensures the model is loaded only once per session.
	"""
	tokenizer = AutoTokenizer.from_pretrained("namannn/llama2-13b-hyperbolic-cluster-pruned")
	model = AutoModelForCausalLM.from_pretrained(
	"namannn/llama2-13b-hyperbolic-cluster-pruned",
	# Optional: specify device and precision to optimize loading
	device_map="auto", # Automatically distribute model across available GPUs/CPU
	torch_dtype=torch.float16, # Use half precision to reduce memory usage
	low_cpu_mem_usage=True # Optimize memory usage during model loading
	)
	return tokenizer, model

	def generate_text(prompt, tokenizer, model, max_length):
	"""
	Generate text using the loaded model and tokenizer.
	"""
	# Encode the prompt text
	inputs = tokenizer(prompt, return_tensors="pt")

	# Generate text with the model
	outputs = model.generate(
	inputs["input_ids"],
	max_length=max_length,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.7
	)

	# Decode and return generated text
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return generated_text

	def main():
	# Set page title and icon
	st.set_page_config(page_title="LLaMa2 Text Generation", page_icon="✍️")

	# Page title and description
	st.title("Text Generation with LLaMa2-13b Hyperbolic Model")
	st.write("Enter a prompt below and the model will generate text.")

	# Load model and tokenizer (only once)
	try:
	tokenizer, model = load_model_and_tokenizer()
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return

	# User input for prompt
	prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away")

	# Slider for controlling the length of the output
	max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=200, value=100)

	# Button to trigger text generation
	if st.button("Generate Text"):
	if prompt:
	try:
	# Generate text
	generated_text = generate_text(prompt, tokenizer, model, max_length)

	# Display generated text
	st.subheader("Generated Text:")
	st.write(generated_text)
	except Exception as e:
	st.error(f"Error generating text: {e}")
	else:
	st.warning("Please enter a prompt to generate text.")

	if __name__ == "__main__":
	main()