import torch from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig import gradio as gr # Lightweight CPU-friendly model model_name = "microsoft/phi-1_5" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) model.generation_config = GenerationConfig.from_pretrained(model_name) model.generation_config.pad_token_id = model.generation_config.eos_token_id def solve_math_problem(question): messages = [{"role": "user", "content": question}] input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt") input_tensor = input_tensor.to(model.device) with torch.no_grad(): outputs = model.generate(input_tensor, max_new_tokens=150) response = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True) return response.strip() with gr.Blocks(css="footer {visibility: hidden}") as demo: gr.Markdown("# 🧙♂️ Math Wizard AI") gr.Markdown("""
This assistant is powered by a lightweight AI model that runs smoothly even on CPUs.
Built with ❤️ using Gradio + HuggingFace Transformers
Model: microsoft/phi-1_5
optimized for reasoning and small footprint.