Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
def load_model_and_tokenizer(): | |
""" | |
Load model and tokenizer with Streamlit's caching to prevent reloading. | |
""" | |
try: | |
tokenizer = AutoTokenizer.from_pretrained( | |
"namannn/llama2-13b-hyperbolic-cluster-pruned", | |
use_fast=True, # Use fast tokenizer if available | |
trust_remote_code=True # Trust remote code for custom tokenizers | |
) | |
# Ensure pad_token is set | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
model = AutoModelForCausalLM.from_pretrained( | |
"namannn/llama2-13b-hyperbolic-cluster-pruned", | |
device_map="auto", | |
torch_dtype=torch.float16, | |
low_cpu_mem_usage=True, | |
trust_remote_code=True # Trust remote code for custom models | |
) | |
return tokenizer, model | |
except Exception as e: | |
st.error(f"Error loading model: {e}") | |
raise | |
def generate_text(prompt, tokenizer, model, max_length): | |
""" | |
Generate text using the loaded model and tokenizer with detailed error handling. | |
""" | |
try: | |
# Ensure input is on the correct device | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
# Generate text with more explicit parameters | |
with torch.no_grad(): # Disable gradient calculation | |
outputs = model.generate( | |
input_ids=inputs["input_ids"], | |
attention_mask=inputs.get("attention_mask"), | |
max_length=max_length + len(inputs["input_ids"][0]), | |
num_return_sequences=1, | |
no_repeat_ngram_size=2, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
temperature=0.7, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# Decode the generated text | |
generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) | |
return generated_text.strip() | |
except Exception as e: | |
st.error(f"Error generating text: {e}") | |
return None | |
def main(): | |
# Set page configuration | |
st.set_page_config(page_title="LLaMa2 Text Generation", page_icon="✍️") | |
# Page title and description | |
st.title("Text Generation with LLaMa2-13b Hyperbolic Model") | |
st.write("Enter a prompt below and the model will generate text.") | |
# Load model and tokenizer | |
try: | |
tokenizer, model = load_model_and_tokenizer() | |
except Exception as e: | |
st.error(f"Failed to load model: {e}") | |
return | |
# System information | |
st.sidebar.header("System Information") | |
st.sidebar.write(f"Device: {model.device}") | |
st.sidebar.write(f"Model Dtype: {model.dtype}") | |
# User input for prompt | |
prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away") | |
# Slider for controlling the length of the output | |
max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=500, value=150) | |
# Button to trigger text generation | |
if st.button("Generate Text"): | |
if prompt: | |
try: | |
# Generate text | |
generated_text = generate_text(prompt, tokenizer, model, max_length) | |
# Display generated text | |
if generated_text: | |
st.subheader("Generated Text:") | |
st.write(generated_text) | |
else: | |
st.warning("No text was generated. Please check the input and try again.") | |
except Exception as e: | |
st.error(f"Unexpected error during text generation: {e}") | |
else: | |
st.warning("Please enter a prompt to generate text.") | |
if __name__ == "__main__": | |
main() |