import os os.system("pip install -U bitsandbytes==0.45.3 transformers accelerate torch --no-cache-dir") import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel from functools import lru_cache # Define models BASE_MODEL = "deepseek-ai/deepseek-math-7b-rl" FINETUNED_MODEL = "LaibaIrfan/emoji_math" # Load tokenizer and model @lru_cache() def load_model(): tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) # Use base model tokenizer base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.float32, device_map="cpu" ) model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, device_map="cpu") return tokenizer, model # Load the model tokenizer, model = load_model() # Function to generate the result def generate_result(incorrect_math): input_text = f"Incorrect: {incorrect_math}\nCorrect:" # Move input to GPU inputs = tokenizer(input_text, return_tensors="pt").to("cuda") # Generate output on GPU output = model.generate(**inputs, max_length=200) return tokenizer.decode(output[0], skip_special_tokens=True) # Gradio Interface iface = gr.Interface( fn=generate_result, inputs="text", outputs="text", title="Emoji Math Solver 🧮", description="Enter an emoji-based math equation, and the model will generate the correct answer!" ) iface.launch(debug=True, share=True, inline=True) # Function to generate result def generate_result(incorrect_math): input_text = f"Incorrect: {incorrect_math}\nCorrect:" inputs = tokenizer(input_text, return_tensors="pt").to("cuda") # Use GPU if available output = model.generate(**inputs, max_length=200) return tokenizer.decode(output[0], skip_special_tokens=True) # Gradio Interface iface = gr.Interface( fn=generate_result, inputs="text", outputs="text", title="Emoji Math Solver 🧮", description="Enter an emoji-based math equation, and the model will generate the correct answer!" ) iface.launch(share=True)