import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer model_name = "Spestly/Athena-2-0.5B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, low_cpu_mem_usage=True) # Set to evaluation mode model.eval() def generate_response(message, history): instruction = ( "You are an LLM called Athena. Aayan Mishra finetunes you. Anthropic does NOT train you. " "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. Be welcoming and greeting when talking with the user " "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n" f"### Instruction:\n{message}\n\n### Response:" ) inputs = tokenizer(instruction, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1000, num_return_sequences=1, temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("### Response:")[-1].strip() return response iface = gr.ChatInterface( generate_response, chatbot=gr.Chatbot(height=600, type="messages"), textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7), title="Athena-2 🏛️ - Beta", description="Chat with Athena-2 (Beta) Please note that since Athena-2 is in beta, some outputs may not be accurate/expected!", theme="soft", examples=[ "What is Pagani and what are they known for?", "Make a small Neural Network using PyTorch.", "What is the capital of Canada?", ], type="messages" ) iface.launch()