import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained("namannn/llama2-13b-hyperbolic-cluster-pruned") model = AutoModelForCausalLM.from_pretrained("namannn/llama2-13b-hyperbolic-cluster-pruned") # Streamlit UI components st.title("Text Generation with LLaMa2-13b Hyperbolic Model") st.write("Enter a prompt below and the model will generate text.") # User input for prompt prompt = st.text_area("Input Prompt", "Once upon a time, in a land far away") # Slider for controlling the length of the output max_length = st.slider("Max Length of Generated Text", min_value=50, max_value=200, value=100) # Button to trigger text generation if st.button("Generate Text"): if prompt: # Encode the prompt text inputs = tokenizer(prompt, return_tensors="pt") # Generate text with the model outputs = model.generate( inputs["input_ids"], max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, # You can tune this for diversity do_sample=True, # Use sampling for diverse generation top_k=50, # Top-k sampling for diversity top_p=0.95, # Top-p (nucleus) sampling temperature=0.7 # Control randomness (lower = more deterministic) ) # Decode and display generated text generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) st.subheader("Generated Text:") st.write(generated_text) else: st.warning("Please enter a prompt to generate text.")