Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# Initialize model and tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("diabolic6045/ELN-Llama-1B-base") | |
model = AutoModelForCausalLM.from_pretrained("diabolic6045/ELN-Llama-1B-base") | |
def generate_response(message, history): | |
# Format the conversation history | |
print("here") | |
conversation = "" | |
for h in history: | |
conversation += f"User: {h[0]}\nAssistant: {h[1]}\n" | |
conversation += f"User: {message}\nAssistant:" | |
# Tokenize input | |
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=512) | |
# Generate response | |
with torch.no_grad(): | |
outputs = model.generate( | |
inputs["input_ids"], | |
max_length=200, | |
temperature=0.7, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
num_return_sequences=1, | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract only the assistant's response | |
response = response.split("Assistant:")[-1].strip() | |
return response | |
# Create the Gradio interface | |
demo = gr.ChatInterface( | |
fn=generate_response, | |
type="messages", | |
title="LLaMA Chatbot", | |
description="Chat with the ELN-Llama-1B model. Ask questions or have a conversation!", | |
examples=[ | |
"What is artificial intelligence?", | |
"Write a short poem about nature.", | |
"Explain quantum computing in simple terms.", | |
], | |
cache_examples=True, | |
additional_inputs=[ | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, label="Temperature"), | |
gr.Slider(minimum=50, maximum=500, value=200, step=50, label="Max Length"), | |
], | |
retry_btn="Regenerate", | |
undo_btn="Undo Last", | |
clear_btn="Clear", | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |