math-llm-demo / app.py
Joash2024's picture
feat: combine working model loading with comparison features
d2da9d1
raw
history blame
6.09 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from monitoring import PerformanceMonitor, measure_time
# Model configurations
BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # Base model
ADAPTER_MODEL = "Joash2024/Math-SmolLM2-1.7B" # Our LoRA adapter
# Initialize performance monitor
monitor = PerformanceMonitor()
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
device_map="auto",
torch_dtype=torch.float16
)
print("Loading fine-tuned model...")
finetuned_model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
# Set models to eval mode
base_model.eval()
finetuned_model.eval()
def format_prompt(problem: str, problem_type: str) -> str:
"""Format input prompt for the model"""
if problem_type == "Derivative":
return f"""Given a mathematical function, find its derivative.
Function: {problem}
The derivative of this function is:"""
elif problem_type == "Addition":
return f"""Solve this addition problem.
Problem: {problem}
The solution is:"""
else: # Roots or Custom
return f"""Find the derivative of this function.
Function: {problem}
The derivative is:"""
@measure_time
def get_model_response(problem: str, problem_type: str, model) -> str:
"""Generate response from a specific model"""
# Format prompt
prompt = format_prompt(problem, problem_type)
# Tokenize
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=100,
num_return_sequences=1,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode and extract response
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = generated[len(prompt):].strip()
return response
def solve_problem(problem: str, problem_type: str) -> tuple:
"""Solve a math problem using both models"""
if not problem:
return "Please enter a problem", "Please enter a problem", None
# Record problem type
monitor.record_problem_type(problem_type)
# Get responses from both models with timing
base_response, base_time = get_model_response(problem, problem_type, base_model)
finetuned_response, finetuned_time = get_model_response(problem, problem_type, finetuned_model)
# Format responses with steps
base_output = f"""Solution: {base_response}
Let's verify this step by step:
1. Starting with f(x) = {problem}
2. Applying differentiation rules
3. We get f'(x) = {base_response}"""
finetuned_output = f"""Solution: {finetuned_response}
Let's verify this step by step:
1. Starting with f(x) = {problem}
2. Applying differentiation rules
3. We get f'(x) = {finetuned_response}"""
# Record metrics
monitor.record_response_time("base", base_time)
monitor.record_response_time("finetuned", finetuned_time)
monitor.record_success("base", not base_response.startswith("Error"))
monitor.record_success("finetuned", not finetuned_response.startswith("Error"))
# Get updated statistics
stats = monitor.get_statistics()
# Format statistics for display
stats_display = f"""
### Performance Metrics
#### Response Times (seconds)
- Base Model: {stats.get('base_avg_response_time', 0):.2f} avg
- Fine-tuned Model: {stats.get('finetuned_avg_response_time', 0):.2f} avg
#### Success Rates
- Base Model: {stats.get('base_success_rate', 0):.1f}%
- Fine-tuned Model: {stats.get('finetuned_success_rate', 0):.1f}%
#### Problem Types Used
"""
for ptype, percentage in stats.get('problem_type_distribution', {}).items():
stats_display += f"- {ptype}: {percentage:.1f}%\n"
return base_output, finetuned_output, stats_display
# Create Gradio interface
with gr.Blocks(title="Mathematics Problem Solver") as demo:
gr.Markdown("# Mathematics Problem Solver")
gr.Markdown("Compare solutions between base and fine-tuned models")
with gr.Row():
with gr.Column():
problem_type = gr.Dropdown(
choices=["Addition", "Root Finding", "Derivative", "Custom"],
value="Derivative",
label="Problem Type"
)
problem_input = gr.Textbox(
label="Enter your math problem",
placeholder="Example: x^2 + 3x"
)
solve_btn = gr.Button("Solve", variant="primary")
with gr.Row():
with gr.Column():
gr.Markdown("### Base Model")
base_output = gr.Textbox(label="Base Model Solution", lines=5)
with gr.Column():
gr.Markdown("### Fine-tuned Model")
finetuned_output = gr.Textbox(label="Fine-tuned Model Solution", lines=5)
# Performance metrics display
with gr.Row():
metrics_display = gr.Markdown("### Performance Metrics\n*Solve a problem to see metrics*")
# Example problems
gr.Examples(
examples=[
["x^2 + 3x", "Derivative"],
["144", "Root Finding"],
["235 + 567", "Addition"],
["\\sin{\\left(x\\right)}", "Derivative"],
["e^x", "Derivative"],
["\\frac{1}{x}", "Derivative"],
["x^3 + 2x", "Derivative"],
["\\cos{\\left(x^2\\right)}", "Derivative"]
],
inputs=[problem_input, problem_type],
outputs=[base_output, finetuned_output, metrics_display],
fn=solve_problem,
cache_examples=True,
)
# Connect the interface
solve_btn.click(
fn=solve_problem,
inputs=[problem_input, problem_type],
outputs=[base_output, finetuned_output, metrics_display]
)
if __name__ == "__main__":
demo.launch()