import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import torch @st.cache_resource def load_model_and_tokenizer(): """ Load model and tokenizer with Streamlit's caching to prevent reloading. @st.cache_resource ensures the model is loaded only once per session. """ tokenizer = AutoTokenizer.from_pretrained("namannn/llama2-13b-hyperbolic-cluster-pruned") model = AutoModelForCausalLM.from_pretrained( "namannn/llama2-13b-hyperbolic-cluster-pruned", # Optional: specify device and precision to optimize loading device_map="auto", # Automatically distribute model across available GPUs/CPU torch_dtype=torch.float16, # Use half precision to reduce memory usage low_cpu_mem_usage=True # Optimize memory usage during model loading ) return tokenizer, model def generate_text(prompt, tokenizer, model, max_length): """ Generate text using the loaded model and tokenizer. """ # Encode the prompt text inputs = tokenizer(prompt, return_tensors="pt") # Generate text with the model outputs = model.generate( inputs["input_ids"], max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True, top_k=50, top_p=0.95, temperature=0.7 ) # Decode and return generated text generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text def main(): # Set page title and icon st.set_page_config(page_title="LLaMa2 Text Generation", page_icon="✍️") # Page title and description st.title("Text Generation with LLaMa2-13b Hyperbolic Model") st.write("Enter a prompt below and the model will generate text.") # Load model and tokenizer (only once) try: tokenizer, model = load_model_and_tokenizer() except Exception as e: st.error(f"Error loading model: {e}") return # User input for prompt prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away") # Slider for controlling the length of the output max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=200, value=100) # Button to trigger text generation if st.button("Generate Text"): if prompt: try: # Generate text generated_text = generate_text(prompt, tokenizer, model, max_length) # Display generated text st.subheader("Generated Text:") st.write(generated_text) except Exception as e: st.error(f"Error generating text: {e}") else: st.warning("Please enter a prompt to generate text.") if __name__ == "__main__": main()