Spaces:

Joash2024
/

math-llm-demo

Sleeping

App Files Files Community

Joash2024 commited on Dec 7, 2024

Commit

2d708a8

1 Parent(s): d2da9d1

feat: load models on demand with better memory management

Browse files

Files changed (1) hide show

app.py +94 -86

app.py CHANGED Viewed

@@ -5,30 +5,24 @@ from peft import PeftModel
 from monitoring import PerformanceMonitor, measure_time
 # Model configurations
-BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"  # Base model
-ADAPTER_MODEL = "Joash2024/Math-SmolLM2-1.7B"       # Our LoRA adapter
 # Initialize performance monitor
 monitor = PerformanceMonitor()
 print("Loading tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 tokenizer.pad_token = tokenizer.eos_token
-print("Loading base model...")
-base_model = AutoModelForCausalLM.from_pretrained(
-    BASE_MODEL,
-    device_map="auto",
-    torch_dtype=torch.float16
-)
-print("Loading fine-tuned model...")
-finetuned_model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
-# Set models to eval mode
-base_model.eval()
-finetuned_model.eval()
 def format_prompt(problem: str, problem_type: str) -> str:
     """Format input prompt for the model"""
     if problem_type == "Derivative":
@@ -48,63 +42,80 @@ Function: {problem}
 The derivative is:"""
 @measure_time
-def get_model_response(problem: str, problem_type: str, model) -> str:
-    """Generate response from a specific model"""
-    # Format prompt
-    prompt = format_prompt(problem, problem_type)
-    # Tokenize
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Generate
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_length=100,
-            num_return_sequences=1,
-            temperature=0.1,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    # Decode and extract response
-    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    response = generated[len(prompt):].strip()
-    return response
-def solve_problem(problem: str, problem_type: str) -> tuple:
-    """Solve a math problem using both models"""
     if not problem:
-        return "Please enter a problem", "Please enter a problem", None
     # Record problem type
     monitor.record_problem_type(problem_type)
-    # Get responses from both models with timing
-    base_response, base_time = get_model_response(problem, problem_type, base_model)
-    finetuned_response, finetuned_time = get_model_response(problem, problem_type, finetuned_model)
-    # Format responses with steps
-    base_output = f"""Solution: {base_response}
-Let's verify this step by step:
-1. Starting with f(x) = {problem}
-2. Applying differentiation rules
-3. We get f'(x) = {base_response}"""
-    finetuned_output = f"""Solution: {finetuned_response}
 Let's verify this step by step:
 1. Starting with f(x) = {problem}
 2. Applying differentiation rules
-3. We get f'(x) = {finetuned_response}"""
     # Record metrics
-    monitor.record_response_time("base", base_time)
-    monitor.record_response_time("finetuned", finetuned_time)
-    monitor.record_success("base", not base_response.startswith("Error"))
-    monitor.record_success("finetuned", not finetuned_response.startswith("Error"))
     # Get updated statistics
     stats = monitor.get_statistics()
@@ -114,24 +125,22 @@ Let's verify this step by step:
 ### Performance Metrics
 #### Response Times (seconds)
-- Base Model: {stats.get('base_avg_response_time', 0):.2f} avg
-- Fine-tuned Model: {stats.get('finetuned_avg_response_time', 0):.2f} avg
 #### Success Rates
-- Base Model: {stats.get('base_success_rate', 0):.1f}%
-- Fine-tuned Model: {stats.get('finetuned_success_rate', 0):.1f}%
 #### Problem Types Used
 """
     for ptype, percentage in stats.get('problem_type_distribution', {}).items():
         stats_display += f"- {ptype}: {percentage:.1f}%\n"
-    return base_output, finetuned_output, stats_display
 # Create Gradio interface
 with gr.Blocks(title="Mathematics Problem Solver") as demo:
     gr.Markdown("# Mathematics Problem Solver")
-    gr.Markdown("Compare solutions between base and fine-tuned models")
     with gr.Row():
         with gr.Column():
@@ -140,6 +149,11 @@ with gr.Blocks(title="Mathematics Problem Solver") as demo:
                 value="Derivative",
                 label="Problem Type"
             )
             problem_input = gr.Textbox(
                 label="Enter your math problem",
                 placeholder="Example: x^2 + 3x"
@@ -147,13 +161,7 @@ with gr.Blocks(title="Mathematics Problem Solver") as demo:
             solve_btn = gr.Button("Solve", variant="primary")
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("### Base Model")
-            base_output = gr.Textbox(label="Base Model Solution", lines=5)
-        with gr.Column():
-            gr.Markdown("### Fine-tuned Model")
-            finetuned_output = gr.Textbox(label="Fine-tuned Model Solution", lines=5)
     # Performance metrics display
     with gr.Row():
@@ -162,17 +170,17 @@ with gr.Blocks(title="Mathematics Problem Solver") as demo:
     # Example problems
     gr.Examples(
         examples=[
-            ["x^2 + 3x", "Derivative"],
-            ["144", "Root Finding"],
-            ["235 + 567", "Addition"],
-            ["\\sin{\\left(x\\right)}", "Derivative"],
-            ["e^x", "Derivative"],
-            ["\\frac{1}{x}", "Derivative"],
-            ["x^3 + 2x", "Derivative"],
-            ["\\cos{\\left(x^2\\right)}", "Derivative"]
         ],
-        inputs=[problem_input, problem_type],
-        outputs=[base_output, finetuned_output, metrics_display],
         fn=solve_problem,
         cache_examples=True,
     )
@@ -180,8 +188,8 @@ with gr.Blocks(title="Mathematics Problem Solver") as demo:
     # Connect the interface
     solve_btn.click(
         fn=solve_problem,
-        inputs=[problem_input, problem_type],
-        outputs=[base_output, finetuned_output, metrics_display]
     )
 if __name__ == "__main__":

 from monitoring import PerformanceMonitor, measure_time
 # Model configurations
+MODEL_OPTIONS = {
+    "Base Model": {
+        "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+        "is_base": True
+    },
+    "Fine-tuned Model": {
+        "id": "Joash2024/Math-SmolLM2-1.7B",
+        "is_base": False
+    }
+}
 # Initialize performance monitor
 monitor = PerformanceMonitor()
 print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct")
 tokenizer.pad_token = tokenizer.eos_token
 def format_prompt(problem: str, problem_type: str) -> str:
     """Format input prompt for the model"""
     if problem_type == "Derivative":
 The derivative is:"""
 @measure_time
+def get_model_response(problem: str, problem_type: str, model_info) -> str:
+    """Get response from a specific model"""
+    try:
+        # Load model
+        if model_info["is_base"]:
+            print(f"Loading {model_info['id']}...")
+            model = AutoModelForCausalLM.from_pretrained(
+                model_info["id"],
+                device_map="auto",
+                torch_dtype=torch.float16
+            )
+        else:
+            print("Loading base model for fine-tuned...")
+            base = AutoModelForCausalLM.from_pretrained(
+                "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+                device_map="auto",
+                torch_dtype=torch.float16
+            )
+            print(f"Loading {model_info['id']}...")
+            model = PeftModel.from_pretrained(base, model_info["id"])
+        model.eval()
+        # Format prompt and generate
+        prompt = format_prompt(problem, problem_type)
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_length=100,
+                num_return_sequences=1,
+                temperature=0.1,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        # Decode and extract response
+        generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        response = generated[len(prompt):].strip()
+        # Clean up
+        del model
+        if not model_info["is_base"]:
+            del base
+        torch.cuda.empty_cache()
+        return response
+    except Exception as e:
+        return f"Error: {str(e)}"
+def solve_problem(problem: str, problem_type: str, model_type: str) -> tuple:
+    """Solve a math problem using selected model"""
     if not problem:
+        return "Please enter a problem", None
     # Record problem type
     monitor.record_problem_type(problem_type)
+    # Get response from selected model
+    model_info = MODEL_OPTIONS[model_type]
+    response, time_taken = get_model_response(problem, problem_type, model_info)
+    # Format response with steps
+    output = f"""Solution: {response}
 Let's verify this step by step:
 1. Starting with f(x) = {problem}
 2. Applying differentiation rules
+3. We get f'(x) = {response}"""
     # Record metrics
+    monitor.record_response_time(model_type, time_taken)
+    monitor.record_success(model_type, not response.startswith("Error"))
     # Get updated statistics
     stats = monitor.get_statistics()
 ### Performance Metrics
 #### Response Times (seconds)
+- {model_type}: {stats.get(f'{model_type}_avg_response_time', 0):.2f} avg
 #### Success Rates
+- {model_type}: {stats.get(f'{model_type}_success_rate', 0):.1f}%
 #### Problem Types Used
 """
     for ptype, percentage in stats.get('problem_type_distribution', {}).items():
         stats_display += f"- {ptype}: {percentage:.1f}%\n"
+    return output, stats_display
 # Create Gradio interface
 with gr.Blocks(title="Mathematics Problem Solver") as demo:
     gr.Markdown("# Mathematics Problem Solver")
+    gr.Markdown("Test our models on mathematical problems")
     with gr.Row():
         with gr.Column():
                 value="Derivative",
                 label="Problem Type"
             )
+            model_type = gr.Dropdown(
+                choices=list(MODEL_OPTIONS.keys()),
+                value="Fine-tuned Model",
+                label="Model to Use"
+            )
             problem_input = gr.Textbox(
                 label="Enter your math problem",
                 placeholder="Example: x^2 + 3x"
             solve_btn = gr.Button("Solve", variant="primary")
     with gr.Row():
+        solution_output = gr.Textbox(label="Solution", lines=5)
     # Performance metrics display
     with gr.Row():
     # Example problems
     gr.Examples(
         examples=[
+            ["x^2 + 3x", "Derivative", "Fine-tuned Model"],
+            ["144", "Root Finding", "Fine-tuned Model"],
+            ["235 + 567", "Addition", "Fine-tuned Model"],
+            ["\\sin{\\left(x\\right)}", "Derivative", "Fine-tuned Model"],
+            ["e^x", "Derivative", "Fine-tuned Model"],
+            ["\\frac{1}{x}", "Derivative", "Fine-tuned Model"],
+            ["x^3 + 2x", "Derivative", "Fine-tuned Model"],
+            ["\\cos{\\left(x^2\\right)}", "Derivative", "Fine-tuned Model"]
         ],
+        inputs=[problem_input, problem_type, model_type],
+        outputs=[solution_output, metrics_display],
         fn=solve_problem,
         cache_examples=True,
     )
     # Connect the interface
     solve_btn.click(
         fn=solve_problem,
+        inputs=[problem_input, problem_type, model_type],
+        outputs=[solution_output, metrics_display]
     )
 if __name__ == "__main__":