Spaces:

mgbam
/

StoryVerseWeaver

Sleeping

App Files Files Community

mgbam commited on May 17

Commit

bebdc57

verified ·

1 Parent(s): 959fea7

Update app.py

Browse files

Files changed (1) hide show

app.py +299 -217

app.py CHANGED Viewed

@@ -1,304 +1,386 @@
 # algoforge_prime/app.py
 import gradio as gr
 import os
-# Initialize core components first (important for loading API keys etc.)
-# This needs to happen before other core modules try to use the status
-from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
-initialize_clients() # Explicitly initialize
 from core.generation_engine import generate_initial_solutions
-from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
 from core.evolution_engine import evolve_solution
-# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it
-# --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
-AVAILABLE_MODELS = {}
-DEFAULT_MODEL_KEY = None
 if GEMINI_API_CONFIGURED:
-    AVAILABLE_MODELS.update({
         "Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
         "Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
     })
-    DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
 if HF_API_CONFIGURED:
-    AVAILABLE_MODELS.update({
         "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
         "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
     })
-    if not DEFAULT_MODEL_KEY:
-        DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
-if not AVAILABLE_MODELS:
-    AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
-    DEFAULT_MODEL_KEY = "No Models Available"
-elif not DEFAULT_MODEL_KEY:
-    DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]
-# --- Main Orchestration Logic ---
-def run_algoforge_simulation(
-    problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
-    num_initial_solutions, selected_model_key,
-    gen_temp, gen_max_tokens,
-    eval_temp, eval_max_tokens,
-    evolve_temp, evolve_max_tokens,
-    progress=gr.Progress(track_tqdm=True) # Gradio progress bar
 ):
     progress(0, desc="Initializing AlgoForge Prime™...")
-    log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"]
-    if not problem_description:
-        return "ERROR: Problem Description is mandatory.", "", "", "", ""
-    model_config = AVAILABLE_MODELS.get(selected_model_key)
-    if not model_config or model_config["type"] == "none":
-        return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""
-    log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
-    log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")
     # --- STAGE 1: GENESIS ---
-    progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
-    log_entries.append("\n**Stage 1: Genesis Engine**")
-    llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
-    initial_solution_texts = generate_initial_solutions(
-        problem_description, initial_hints, problem_type,
-        num_initial_solutions, llm_gen_config
     )
-    log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
-    for i, sol_text in enumerate(initial_solution_texts):
-        log_entries.append(f"  Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")
-    valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
-    if not valid_initial_solutions:
-        error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
-        return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""
-    # --- STAGE 2: CRITIQUE & EVALUATION ---
     progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
-    log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**")
-    evaluated_candidates_data = []
-    llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}
-    for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
-        progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
-        log_entries.append(f"\nEvaluating Candidate {i+1}:")
-        if sol_text.startswith("ERROR"):
-            eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
-            log_entries.append(f"  Skipping detailed evaluation for error: {sol_text}")
-        else:
-            eval_res = evaluate_solution_candidate(
-                sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
-            )
-            log_entries.append(f"  LLM Critique & Test Score: {eval_res.score}/10")
-            log_entries.append(f"  Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
-            if eval_res.execution_error: log_entries.append(f"  Execution Error: {eval_res.execution_error}")
-            log_entries.append(f"  Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
-        evaluated_candidates_data.append({
             "id": i + 1,
-            "solution_text": sol_text,
-            "evaluation": eval_res
         })
-    # Format display for initial solutions
-    initial_solutions_display_md = []
-    for data in evaluated_candidates_data:
-        initial_solutions_display_md.append(
-            f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n"
-            f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---"
         )
-    # --- STAGE 3: SELECTION ---
-    progress(0.75, desc="Stage 3: Selecting Champion...")
-    # Filter out candidates that were errors from genesis before sorting by score
-    valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
-    if not valid_evaluated_candidates:
-         return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""
-    valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
-    best_candidate_data = valid_evaluated_candidates[0]
-    log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")
-    best_solution_display_md = (
-        f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n"
-        f"```python\n{best_candidate_data['solution_text']}\n```\n"
-        f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}"
     )
-    # --- STAGE 4: EVOLUTION ---
-    progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
-    log_entries.append("\n**Stage 4: Evolutionary Forge**")
-    llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
-    evolved_solution_text = evolve_solution(
-        best_candidate_data["solution_text"],
-        str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
-        best_candidate_data["evaluation"].score,
-        problem_description,
-        problem_type,
-        llm_evolve_config
     )
-    log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")
-    evolved_solution_display_md = ""
-    final_thoughts_md = "" # For LLM explanation of unit test results if needed
-    if evolved_solution_text.startswith("ERROR"):
-        evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}"
     else:
-        evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```"
-        # Optionally, re-evaluate the evolved solution with unit tests if provided
-        if "python" in problem_type.lower() and user_tests_string:
-            progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
-            log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**")
-            # Using a neutral LLM config for this, or could be separate
-            # This evaluation is primarily for the test results, not another LLM critique of the evolved code
-            evolved_eval_res = evaluate_solution_candidate(
-                 evolved_solution_text, problem_description, problem_type, user_tests_string,
-                 {"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
             )
-            evolved_solution_display_md += (
-                f"\n\n**Post-Evolution Test Results (Simulated):**\n"
-                f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
             )
-            if evolved_eval_res.execution_error:
-                 evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
-            log_entries.append(f"  Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")
             # Get LLM to explain the test results of the evolved code
-            # progress(0.95, desc="Explaining Evolved Code Test Results...")
-            # explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
-            # explain_sys_prompt = get_system_prompt("code_execution_explainer")
-            # explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
-            # final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}"
-    log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**")
     progress(1.0, desc="Cycle Complete!")
-    return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md
-# --- GRADIO UI (largely similar, but with a new input for user tests) ---
 intro_markdown = """
-# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
-This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery,
-featuring basic (simulated) unit testing for Python code.
 **API Keys Required in Space Secrets:**
-- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
 - `HF_TOKEN` (Secondary): For Hugging Face hosted models.
 """
-token_status_md = ""
 if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
-    token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
 else:
-    if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
-    else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
-    if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
-    else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
     gr.Markdown(intro_markdown)
-    gr.HTML(token_status_md)
-    if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
-         gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
     else:
         with gr.Row():
-            with gr.Column(scale=2): # Made input column wider
                 gr.Markdown("## 💡 1. Define the Challenge")
-                problem_type_dd = gr.Dropdown(
-                    ["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
-                    label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
                 )
-                problem_desc_tb = gr.Textbox(
-                    lines=4, label="Problem Description / Desired Outcome",
-                    placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
                 )
-                initial_hints_tb = gr.Textbox(
-                    lines=2, label="Initial Thoughts / Constraints (Optional)",
-                    placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
                 )
-                # NEW INPUT for User Tests
-                user_tests_tb = gr.Textbox(
-                    lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
-                    placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
-                    info="For 'Python Algorithm with Tests' type. Ignored otherwise."
                 )
                 gr.Markdown("## ⚙️ 2. Configure The Forge")
-                model_select_dd = gr.Dropdown(
-                    choices=list(AVAILABLE_MODELS.keys()),
-                    value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
-                    label="Select LLM Core Model"
                 )
-                num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs
-                with gr.Accordion("Advanced LLM Parameters", open=False):
-                    # ... (temp and max_tokens sliders - same as before) ...
                     with gr.Row():
-                        gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
-                        gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
                     with gr.Row():
-                        eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
-                        eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
                     with gr.Row():
-                        evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
-                        evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")
-                submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")
-            with gr.Column(scale=3): # Made output column wider
                 gr.Markdown("## 🔥 3. The Forge's Output")
-                with gr.Tabs():
-                    with gr.TabItem("📜 Genesis Candidates & Evaluations"):
-                        output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
-                    with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
-                        output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
-                    with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
-                        output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
-                        # output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
-                    with gr.TabItem("🛠️ Interaction Log (Dev View)"):
-                        output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
-        outputs_list = [
-            output_initial_solutions_md, output_best_solution_md,
-            output_evolved_solution_md, output_interaction_log_md,
-            gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
-        ]
-        submit_btn.click(
-            fn=run_algoforge_simulation,
             inputs=[
-                problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
-                num_solutions_slider, model_select_dd,
-                gen_temp_slider, gen_max_tokens_slider,
-                eval_temp_slider, eval_max_tokens_slider,
-                evolve_temp_slider, evolve_max_tokens_slider
             ],
-            outputs=outputs_list
         )
     gr.Markdown("---")
     gr.Markdown(
-        "**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
-        "Real sandboxing is complex and critical for safety."
     )
-# --- Entry Point ---
 if __name__ == "__main__":
     print("="*80)
-    print("AlgoForge Prime™ (Modular Version) Starting...")
-    # ... (startup print messages for API key status - same as before) ...
-    print(f"UI default model key: {DEFAULT_MODEL_KEY}")
-    print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
     print("="*80)
-    demo.launch(debug=True, server_name="0.0.0.0")

 # algoforge_prime/app.py
 import gradio as gr
 import os
+import time # For progress updates
+# --- Core Logic Imports ---
+# Initialize clients first to ensure API keys are loaded before other modules use them.
+from core.llm_clients import initialize_all_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
+initialize_all_clients() # Call initialization once when the app starts
 from core.generation_engine import generate_initial_solutions
+from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult # Class for typed results
 from core.evolution_engine import evolve_solution
+from prompts.system_prompts import get_system_prompt # For specific roles like test explainer
+from prompts.prompt_templates import format_code_test_analysis_user_prompt
+# --- Application Configuration (Models, Defaults) ---
+AVAILABLE_MODELS_CONFIG = {}
+UI_DEFAULT_MODEL_KEY = None
+# Populate with Gemini models if API is configured
 if GEMINI_API_CONFIGURED:
+    AVAILABLE_MODELS_CONFIG.update({
         "Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
         "Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
     })
+    UI_DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
+# Populate with Hugging Face models if API is configured
 if HF_API_CONFIGURED:
+    AVAILABLE_MODELS_CONFIG.update({
         "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
         "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
+        "CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"}, # Smaller CodeLlama
     })
+    if not UI_DEFAULT_MODEL_KEY: # If Gemini isn't configured, default to an HF model
+        UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
+# Absolute fallback if no models could be configured
+if not AVAILABLE_MODELS_CONFIG:
+    print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets.")
+    AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys)"] = {"id": "dummy", "type": "none"}
+    UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys)"
+elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG:
+    UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0] # Pick first available if default somehow not set
+# --- Main Orchestration Logic for Gradio ---
+def run_algoforge_simulation_orchestrator(
+    problem_type_selected: str,
+    problem_description_text: str,
+    initial_hints_text: str,
+    user_provided_tests_code: str,
+    num_initial_solutions_to_gen: int,
+    selected_model_ui_key: str,
+    genesis_temp: float, genesis_max_tokens: int,
+    critique_temp: float, critique_max_tokens: int,
+    evolution_temp: float, evolution_max_tokens: int,
+    progress=gr.Progress(track_tqdm=True) # Gradio progress tracker
 ):
     progress(0, desc="Initializing AlgoForge Prime™...")
+    log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
+    start_time = time.time()
+    # Basic input validation
+    if not problem_description_text.strip():
+        error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
+        log_entries.append(error_msg)
+        return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs
+    current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
+    if not current_model_config or current_model_config["type"] == "none":
+        error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart."
+        log_entries.append(error_msg)
+        return error_msg, "", "", "\n".join(log_entries), ""
+    log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})")
+    log_entries.append(f"Problem Type: {problem_type_selected}")
+    log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")
+    # Prepare LLM configurations for each stage
+    llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
+    llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
+    llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
     # --- STAGE 1: GENESIS ---
+    progress(0.1, desc="Stage 1: Genesis Engine - Generating Initial Solutions...")
+    log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**")
+    initial_raw_solutions = generate_initial_solutions(
+        problem_description_text, initial_hints_text, problem_type_selected,
+        num_initial_solutions_to_gen, llm_config_genesis
     )
+    log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
+    for i, sol_text in enumerate(initial_raw_solutions):
+        log_entries.append(f"  Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...")
+    # --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
     progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
+    log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**")
+    evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult}
+    for i, candidate_solution_text in enumerate(initial_raw_solutions):
+        current_progress = 0.3 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.35 # Progress for evaluation stage
+        progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
+        log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
+        # The evaluation_engine handles if candidate_solution_text itself is an error string
+        evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult
+            candidate_solution_text, problem_description_text, problem_type_selected,
+            user_provided_tests_code, llm_config_critique
+        )
+        log_entries.append(f"  Final Combined Score: {evaluation_obj.score}/10")
+        log_entries.append(f"  Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
+        if evaluation_obj.execution_summary: log_entries.append(f"  Execution Summary: {evaluation_obj.execution_summary}")
+        log_entries.append(f"  LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")
+        evaluated_candidates_list.append({
             "id": i + 1,
+            "solution_text": candidate_solution_text, # Store original text, even if it was an error from genesis
+            "evaluation_result": evaluation_obj
         })
+    # Format display for initial solutions & evaluations
+    initial_solutions_display_markdown = []
+    for data in evaluated_candidates_list:
+        initial_solutions_display_markdown.append(
+            f"**Candidate {data['id']}:**\n"
+            f"```python\n{data['solution_text']}\n```\n\n" # Assuming python for display, adjust if problem_type varies widely
+            f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n"
+            f"{data['evaluation_result'].critique_text}\n---"
         )
+    # --- STAGE 3: SELECTION OF CHAMPION ---
+    progress(0.7, desc="Stage 3: Selecting Champion Candidate...")
+    log_entries.append("\n**------ STAGE 3: CHAMPION SELECTION ------**")
+    # Filter out candidates that were errors from genesis OR had very low evaluation scores (e.g., score of 0 from evaluation)
+    # We want to select a champion that is actually a piece of code/algorithm, not an error message.
+    potentially_viable_candidates = [
+        cand for cand in evaluated_candidates_list
+        if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \
+           cand["solution_text"] and not cand["solution_text"].startswith("ERROR")
+    ]
+    if not potentially_viable_candidates:
+        final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low."
+        log_entries.append(f"  CRITICAL: {final_error_msg}")
+        return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""
+    potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
+    champion_candidate_data = potentially_viable_candidates[0]
+    log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
+                       f"(Solution Snippet: {champion_candidate_data['solution_text'][:60]}...) "
+                       f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")
+    champion_display_markdown = (
+        f"**Champion Candidate ID: {champion_candidate_data['id']} "
+        f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n"
+        f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
+        f"**Original Comprehensive Evaluation for this Champion:**\n"
+        f"{champion_candidate_data['evaluation_result'].critique_text}"
     )
+    # --- STAGE 4: EVOLUTIONARY FORGE ---
+    progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...")
+    log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**")
+    evolved_solution_code = evolve_solution(
+        champion_candidate_data["solution_text"],
+        champion_candidate_data["evaluation_result"].critique_text, # Pass the full critique
+        champion_candidate_data["evaluation_result"].score,
+        problem_description_text,
+        problem_type_selected,
+        llm_config_evolution
     )
+    log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...")
+    evolved_solution_display_markdown = ""
+    ai_test_analysis_markdown = "" # For LLM explanation of unit test results of evolved code
+    if evolved_solution_code.startswith("ERROR"):
+        evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
     else:
+        evolved_solution_display_markdown = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
+        # Optionally, re-evaluate the evolved solution with unit tests if provided and applicable
+        if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
+            progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...")
+            log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---")
+            # Use a low temperature for this critique to focus on test results rather than creative critique
+            # The critique part here is mostly for consistency, primary goal is test execution.
+            evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens}
+            evolved_code_eval_result = evaluate_solution_candidate( # type: EvaluationResult
+                 evolved_solution_code, problem_description_text, problem_type_selected,
+                 user_provided_tests_code, evolved_critique_config
             )
+            evolved_solution_display_markdown += (
+                f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
+                f"{evolved_code_eval_result.execution_summary}\n"
+                f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n"
             )
+            log_entries.append(f"  Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. "
+                               f"Summary: {evolved_code_eval_result.execution_summary}")
             # Get LLM to explain the test results of the evolved code
+            if evolved_code_eval_result.total_tests > 0 : # Only if tests were run
+                progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
+                log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---")
+                analysis_user_prompt = format_code_test_analysis_user_prompt(
+                    evolved_solution_code,
+                    user_provided_tests_code,
+                    evolved_code_eval_result.execution_summary # Pass the summary string
+                )
+                analysis_system_prompt = get_system_prompt("code_execution_explainer")
+                # Use a config for analysis - can be same as critique or specialized
+                llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
+                                       "temp": 0.3, "max_tokens": critique_max_tokens + 100} # A bit more tokens for explanation
+                from core.llm_clients import call_huggingface_api, call_gemini_api # Re-import for clarity or use a dispatcher
+                explanation_response_obj = None
+                if llm_analysis_config["type"] == "hf":
+                    explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
+                elif llm_analysis_config["type"] == "google_gemini":
+                    explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
+                if explanation_response_obj and explanation_response_obj.success:
+                    ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
+                    log_entries.append(f"  AI Test Analysis (Snippet): {explanation_response_obj.text[:100]}...")
+                elif explanation_response_obj:
+                    ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}"
+                    log_entries.append(f"  AI Test Analysis Error: {explanation_response_obj.error}")
+    total_time = time.time() - start_time
+    log_entries.append(f"\n**AlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.**")
     progress(1.0, desc="Cycle Complete!")
+    return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
+# --- Gradio UI Definition ---
+# (This section is largely similar to the previous app.py, with updated input/output connections)
 intro_markdown = """
+# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v2)
+This enhanced version uses a modular codebase and demonstrates a conceptual workflow for AI-assisted algorithm discovery,
+featuring (simulated) unit testing for Python code if provided.
 **API Keys Required in Space Secrets:**
+- `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project.
 - `HF_TOKEN` (Secondary): For Hugging Face hosted models.
+If keys are missing or invalid, corresponding models will be unavailable.
 """
+# Determine API status for UI message
+ui_token_status_md = ""
 if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
+    ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>"
 else:
+    if GEMINI_API_CONFIGURED:
+        ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>"
+    else:
+        ui_token_status_md += "<p style='color:orange;'>⚠️ **GOOGLE_API_KEY missing or failed to configure.** Gemini API models will be disabled.</p>"
+    if HF_API_CONFIGURED:
+        ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>"
+    else:
+        ui_token_status_md += "<p style='color:orange;'>⚠️ **HF_TOKEN missing or client failed to initialize.** Hugging Face models will be disabled.</p>"
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="AlgoForge Prime™ Modular v2") as app_demo:
     gr.Markdown(intro_markdown)
+    gr.HTML(ui_token_status_md)
+    if not AVAILABLE_MODELS_CONFIG or UI_DEFAULT_MODEL_KEY == "No Models Available (Check API Keys)":
+         gr.Markdown("<h2 style='color:red;'>No LLM models are available. Please check your API key configurations in this Space's Secrets and restart the Space. The application cannot function without at least one working API configuration.</h2>")
     else:
         with gr.Row():
+            # Input Column
+            with gr.Column(scale=2): # Input column slightly wider
                 gr.Markdown("## 💡 1. Define the Challenge")
+                problem_type_dropdown = gr.Dropdown(
+                    choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"],
+                    label="Type of Problem / Algorithm", value="Python Algorithm with Tests",
+                    info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below."
                 )
+                problem_description_textbox = gr.Textbox(
+                    lines=5, label="Problem Description / Desired Outcome",
+                    placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
                 )
+                initial_hints_textbox = gr.Textbox(
+                    lines=3, label="Initial Thoughts / Constraints / Seed Ideas (Optional)",
+                    placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
                 )
+                user_tests_textbox = gr.Textbox(
+                    lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
+                    placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# try: calculate_factorial(-1); assert False # Expected ValueError\n# except ValueError: assert True",
+                    info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
                 )
                 gr.Markdown("## ⚙️ 2. Configure The Forge")
+                model_selection_dropdown = gr.Dropdown(
+                    choices=list(AVAILABLE_MODELS_CONFIG.keys()),
+                    value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None),
+                    label="Select LLM Core Model",
+                    info="Ensure the corresponding API key (Google or HF) is configured in secrets."
                 )
+                num_initial_solutions_slider = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)")
+                with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False):
                     with gr.Row():
+                        genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.") # Gemini range often 0-1
+                        genesis_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=768, step=64, label="Genesis Max Output Tokens")
                     with gr.Row():
+                        critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
+                        critique_max_tokens_slider = gr.Slider(minimum=150, maximum=1024, value=512, step=64, label="Critique Max Output Tokens")
                     with gr.Row():
+                        evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
+                        evolution_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=1024, step=64, label="Evolution Max Output Tokens")
+                engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")
+            # Output Column
+            with gr.Column(scale=3): # Output column wider
                 gr.Markdown("## 🔥 3. The Forge's Output")
+                with gr.Tabs(elem_id="output_tabs_elem"):
+                    with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"):
+                        output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations")
+                    with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"):
+                        output_champion_markdown = gr.Markdown(label="Top Pick for Refinement")
+                    with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"):
+                        output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge")
+                        output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
+                    with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
+                        output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
+        # Connect button to the orchestration function
+        engage_button.click(
+            fn=run_algoforge_simulation_orchestrator,
             inputs=[
+                problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox,
+                num_initial_solutions_slider, model_selection_dropdown,
+                genesis_temp_slider, genesis_max_tokens_slider,
+                critique_temp_slider, critique_max_tokens_slider,
+                evolution_temp_slider, evolution_max_tokens_slider
             ],
+            outputs=[
+                output_initial_solutions_markdown, output_champion_markdown,
+                output_evolved_markdown, output_interaction_log_markdown,
+                output_ai_test_analysis_markdown
+            ]
         )
     gr.Markdown("---")
     gr.Markdown(
+        "**Disclaimer:** This is a conceptual, educational demonstration. "
+        "The (simulated) unit testing feature is for illustrative purposes. "
+        "**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
+        "Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
+        "LLM outputs always require careful human review and verification."
     )
+# --- Entry Point for Running the Gradio App ---
 if __name__ == "__main__":
     print("="*80)
+    print("AlgoForge Prime™ (Modular Version v2 with Simulated Testing) - Launching...")
+    print(f"  Google Gemini API Configured: {GEMINI_API_CONFIGURED}")
+    print(f"  Hugging Face API Configured: {HF_API_CONFIGURED}")
+    if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
+        print("  CRITICAL WARNING: No API keys seem to be configured. The application will likely be non-functional.")
+    print(f"  UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
+    print(f"  Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
     print("="*80)
+    app_demo.launch(debug=True, server_name="0.0.0.0") # server_name="0.0.0.0" is often needed for Docker/Spaces