Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
def load_model_and_tokenizer(): | |
""" | |
Load model and tokenizer with Streamlit's caching to prevent reloading. | |
@st.cache_resource ensures the model is loaded only once per session. | |
""" | |
tokenizer = AutoTokenizer.from_pretrained("namannn/llama2-13b-hyperbolic-cluster-pruned") | |
model = AutoModelForCausalLM.from_pretrained( | |
"namannn/llama2-13b-hyperbolic-cluster-pruned", | |
# Optional: specify device and precision to optimize loading | |
device_map="auto", # Automatically distribute model across available GPUs/CPU | |
torch_dtype=torch.float16, # Use half precision to reduce memory usage | |
low_cpu_mem_usage=True # Optimize memory usage during model loading | |
) | |
return tokenizer, model | |
def generate_text(prompt, tokenizer, model, max_length): | |
""" | |
Generate text using the loaded model and tokenizer. | |
""" | |
# Encode the prompt text | |
inputs = tokenizer(prompt, return_tensors="pt") | |
# Generate text with the model | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_length=max_length, | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
temperature=0.7 | |
) | |
# Decode and return generated text | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return generated_text | |
def main(): | |
# Set page title and icon | |
st.set_page_config(page_title="LLaMa2 Text Generation", page_icon="✍️") | |
# Page title and description | |
st.title("Text Generation with LLaMa2-13b Hyperbolic Model") | |
st.write("Enter a prompt below and the model will generate text.") | |
# Load model and tokenizer (only once) | |
try: | |
tokenizer, model = load_model_and_tokenizer() | |
except Exception as e: | |
st.error(f"Error loading model: {e}") | |
return | |
# User input for prompt | |
prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away") | |
# Slider for controlling the length of the output | |
max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=200, value=100) | |
# Button to trigger text generation | |
if st.button("Generate Text"): | |
if prompt: | |
try: | |
# Generate text | |
generated_text = generate_text(prompt, tokenizer, model, max_length) | |
# Display generated text | |
st.subheader("Generated Text:") | |
st.write(generated_text) | |
except Exception as e: | |
st.error(f"Error generating text: {e}") | |
else: | |
st.warning("Please enter a prompt to generate text.") | |
if __name__ == "__main__": | |
main() |