namannn's picture
Update app.py
a992249 verified
raw
history blame
2.84 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
@st.cache_resource
def load_model_and_tokenizer():
"""
Load model and tokenizer with Streamlit's caching to prevent reloading.
@st.cache_resource ensures the model is loaded only once per session.
"""
tokenizer = AutoTokenizer.from_pretrained("namannn/llama2-13b-hyperbolic-cluster-pruned")
model = AutoModelForCausalLM.from_pretrained(
"namannn/llama2-13b-hyperbolic-cluster-pruned",
# Optional: specify device and precision to optimize loading
device_map="auto", # Automatically distribute model across available GPUs/CPU
torch_dtype=torch.float16, # Use half precision to reduce memory usage
low_cpu_mem_usage=True # Optimize memory usage during model loading
)
return tokenizer, model
def generate_text(prompt, tokenizer, model, max_length):
"""
Generate text using the loaded model and tokenizer.
"""
# Encode the prompt text
inputs = tokenizer(prompt, return_tensors="pt")
# Generate text with the model
outputs = model.generate(
inputs["input_ids"],
max_length=max_length,
num_return_sequences=1,
no_repeat_ngram_size=2,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.7
)
# Decode and return generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated_text
def main():
# Set page title and icon
st.set_page_config(page_title="LLaMa2 Text Generation", page_icon="✍️")
# Page title and description
st.title("Text Generation with LLaMa2-13b Hyperbolic Model")
st.write("Enter a prompt below and the model will generate text.")
# Load model and tokenizer (only once)
try:
tokenizer, model = load_model_and_tokenizer()
except Exception as e:
st.error(f"Error loading model: {e}")
return
# User input for prompt
prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away")
# Slider for controlling the length of the output
max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=200, value=100)
# Button to trigger text generation
if st.button("Generate Text"):
if prompt:
try:
# Generate text
generated_text = generate_text(prompt, tokenizer, model, max_length)
# Display generated text
st.subheader("Generated Text:")
st.write(generated_text)
except Exception as e:
st.error(f"Error generating text: {e}")
else:
st.warning("Please enter a prompt to generate text.")
if __name__ == "__main__":
main()