diabolic6045's picture
Update app.py
01d8454 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Initialize model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("diabolic6045/ELN-Llama-1B-base")
model = AutoModelForCausalLM.from_pretrained("diabolic6045/ELN-Llama-1B-base")
def generate_response(message, temperature, max_length):
# Tokenize input
inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
input_ids = inputs["input_ids"]
current_text = message
# Generate response token by token
for _ in range(max_length - input_ids.shape[1]):
with torch.no_grad():
outputs = model(input_ids)
next_token_logits = outputs.logits[:, -1, :]
# Apply temperature
next_token_logits = next_token_logits / temperature
# Sample from the distribution
probs = torch.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
# Stop if we generate an EOS token
if next_token.item() == tokenizer.eos_token_id:
break
# Append the new token to input_ids
input_ids = torch.cat([input_ids, next_token], dim=-1)
# Decode only the new token and add it to current text
new_token_text = tokenizer.decode(next_token[0], skip_special_tokens=True)
current_text += new_token_text
yield current_text
# Create the Gradio interface
demo = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(label="Input Text", lines=4, placeholder="Enter your text here and the model will complete it..."),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature (higher = more creative, lower = more focused)"),
gr.Slider(minimum=50, maximum=500, value=200, step=50, label="Max Length (longer text = more completion)"),
],
outputs=gr.Textbox(label="Generated Completion", lines=4),
title="Llama 3.2 1B Finetuned With Evolution Learning Network (ELN) Text Completion Demo",
description= """
> This project implements an Evolution Learning Network (ELN) to fine-tune transformer-based models like LLaMA using a combination of Quantized Low-Rank Adaptation (QLoRA) and Genetic Algorithms (GA). The primary objective is to evolve a population of models across multiple generations to optimize for performance (fitness) and specialization, while maintaining diversity.
This is a demo of [`diabolic6045/ELN-Llama-1B-base`](https://huggingface.co/diabolic6045/ELN-Llama-1B-base).
""",
examples=[
["Once upon a time in a magical forest", 0.7, 50],
["The recipe for making the perfect chocolate cake requires", 0.7, 50],
["In the year 2150, humanity had finally achieved", 0.7, 50],
["The most important principles of effective programming are", 0.8, 50],
],
article="""
## Tips for better completions:
- Start with a clear and detailed prompt
- Adjust temperature: Higher for creative writing, lower for factual completion
- Adjust max length based on how much text you want to generate
"""
)
if __name__ == "__main__":
demo.launch(share=True)