import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, login import torch HF_TOKEN = os.getenv("HF_TOKEN") login(token=HF_TOKEN) model_name = "Spestly/Atlas-Pro-1.5B-Preview" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, low_cpu_mem_usage=True) model.eval() def generate_response(message, history): instruction = ( "You are an LLM called Atlas. You are finetuned by Aayan Mishra. You are NOT trained by Anthropic. " "You are a Qwen 2.5 fine-tune. Your purpose is the help the user accomplish their request to the best of your abilities. " "Below is an instruction that describes a task. Answer it clearly and concisely.\n\n" f"### Instruction:\n{message}\n\n### Response:" ) inputs = tokenizer(instruction, return_tensors="pt") with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1000, num_return_sequences=1, temperature=0.7, top_p=0.9, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("### Response:")[-1].strip() return response iface = gr.ChatInterface( generate_response, chatbot=gr.Chatbot(height=600, type="messages"), textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7), title="🦁 Atlas-Pro", description="Chat with Alas-Pro", theme="citrus", examples=[ "Can you give me a good salsa recipe?", "Write an engaging two-line horror story.", "What is the capital of Australia?", ], type="messages" ) iface.launch()