Spaces:

AIRRC
/

ELN-Llama-1B-base-chat

Running

App Files Files Community

ELN-Llama-1B-base-chat / app.py

diabolic6045

Update app.py

01d8454 verified 5 months ago

raw

history blame contribute delete

3.29 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	# Initialize model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("diabolic6045/ELN-Llama-1B-base")
	model = AutoModelForCausalLM.from_pretrained("diabolic6045/ELN-Llama-1B-base")

	def generate_response(message, temperature, max_length):
	# Tokenize input
	inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
	input_ids = inputs["input_ids"]
	current_text = message

	# Generate response token by token
	for _ in range(max_length - input_ids.shape[1]):
	with torch.no_grad():
	outputs = model(input_ids)
	next_token_logits = outputs.logits[:, -1, :]

	# Apply temperature
	next_token_logits = next_token_logits / temperature

	# Sample from the distribution
	probs = torch.softmax(next_token_logits, dim=-1)
	next_token = torch.multinomial(probs, num_samples=1)

	# Stop if we generate an EOS token
	if next_token.item() == tokenizer.eos_token_id:
	break

	# Append the new token to input_ids
	input_ids = torch.cat([input_ids, next_token], dim=-1)

	# Decode only the new token and add it to current text
	new_token_text = tokenizer.decode(next_token[0], skip_special_tokens=True)
	current_text += new_token_text
	yield current_text

	# Create the Gradio interface
	demo = gr.Interface(
	fn=generate_response,
	inputs=[
	gr.Textbox(label="Input Text", lines=4, placeholder="Enter your text here and the model will complete it..."),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature (higher = more creative, lower = more focused)"),
	gr.Slider(minimum=50, maximum=500, value=200, step=50, label="Max Length (longer text = more completion)"),
	],
	outputs=gr.Textbox(label="Generated Completion", lines=4),
	title="Llama 3.2 1B Finetuned With Evolution Learning Network (ELN) Text Completion Demo",
	description= """
	> This project implements an Evolution Learning Network (ELN) to fine-tune transformer-based models like LLaMA using a combination of Quantized Low-Rank Adaptation (QLoRA) and Genetic Algorithms (GA). The primary objective is to evolve a population of models across multiple generations to optimize for performance (fitness) and specialization, while maintaining diversity.
	This is a demo of [`diabolic6045/ELN-Llama-1B-base`](https://huggingface.co/diabolic6045/ELN-Llama-1B-base).
	""",
	examples=[
	["Once upon a time in a magical forest", 0.7, 50],
	["The recipe for making the perfect chocolate cake requires", 0.7, 50],
	["In the year 2150, humanity had finally achieved", 0.7, 50],
	["The most important principles of effective programming are", 0.8, 50],
	],
	article="""
	## Tips for better completions:
	- Start with a clear and detailed prompt
	- Adjust temperature: Higher for creative writing, lower for factual completion
	- Adjust max length based on how much text you want to generate
	"""
	)

	if __name__ == "__main__":
	demo.launch(share=True)