Spaces:

mgbam
/

StoryVerseWeaver

Sleeping

File size: 17,755 Bytes

ded730b
0751433
c984bb4
7dbc041
ded730b
 
 
 
7dbc041
ded730b
 
 
 
 
 
7dbc041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ded730b
7dbc041
 
 
 
 
ded730b
7dbc041
 
c984bb4
ded730b
c984bb4
ded730b
c984bb4
 
 
ded730b
 
c984bb4
ded730b
 
 
c984bb4
ded730b
c984bb4
ded730b
 
 
7dbc041
ded730b
 
 
 
 
 
7dbc041
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0751433
ded730b
 
 
 
 
 
 
 
 
c984bb4
ded730b
 
c984bb4
ded730b
 
 
 
7dbc041
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
7dbc041
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c984bb4
ded730b
 
 
 
 
0751433
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c984bb4
ded730b
 
0751433
 
ded730b
c984bb4
ded730b
 
 
7dbc041
 
ded730b
 
0751433
7dbc041
 
ded730b
7dbc041
ded730b
 
 
 
c984bb4
0751433
ded730b
7dbc041
ded730b
7dbc041
 
ded730b
7dbc041
 
ded730b
7dbc041
 
ded730b
 
7dbc041
 
ded730b
 
7dbc041
 
ded730b
 
 
 
 
 
 
 
7dbc041
 
 
 
 
ded730b
7dbc041
 
ded730b
7dbc041
 
ded730b
7dbc041
ded730b
 
7dbc041
 
ded730b
7dbc041
 
ded730b
 
7dbc041
 
 
ded730b
7dbc041
 
ded730b
 
7dbc041
ded730b
 
 
 
7dbc041
 
ded730b
 
 
 
 
 
7dbc041
 
 
 
ded730b
7dbc041
 
 
 
 
ded730b
7dbc041
0751433
c984bb4
ded730b
 
c984bb4
0751433
ded730b
0751433
7dbc041
ded730b
 
 
7dbc041

# algoforge_prime/app.py
import gradio as gr
import os

# Initialize core components first (important for loading API keys etc.)
# This needs to happen before other core modules try to use the status
from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED 
initialize_clients() # Explicitly initialize

from core.generation_engine import generate_initial_solutions
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
from core.evolution_engine import evolve_solution
# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it

# --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
AVAILABLE_MODELS = {}
DEFAULT_MODEL_KEY = None

if GEMINI_API_CONFIGURED:
    AVAILABLE_MODELS.update({
        "Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
        "Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
    })
    DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"

if HF_API_CONFIGURED:
    AVAILABLE_MODELS.update({
        "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
        "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
    })
    if not DEFAULT_MODEL_KEY: 
        DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"

if not AVAILABLE_MODELS:
    AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
    DEFAULT_MODEL_KEY = "No Models Available"
elif not DEFAULT_MODEL_KEY: 
    DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]


# --- Main Orchestration Logic ---
def run_algoforge_simulation(
    problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
    num_initial_solutions, selected_model_key,
    gen_temp, gen_max_tokens,
    eval_temp, eval_max_tokens,
    evolve_temp, evolve_max_tokens,
    progress=gr.Progress(track_tqdm=True) # Gradio progress bar
):
    progress(0, desc="Initializing AlgoForge Prime™...")
    log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"]

    if not problem_description:
        return "ERROR: Problem Description is mandatory.", "", "", "", ""

    model_config = AVAILABLE_MODELS.get(selected_model_key)
    if not model_config or model_config["type"] == "none":
        return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""
    
    log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
    log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")

    # --- STAGE 1: GENESIS ---
    progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
    log_entries.append("\n**Stage 1: Genesis Engine**")
    
    llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
    initial_solution_texts = generate_initial_solutions(
        problem_description, initial_hints, problem_type,
        num_initial_solutions, llm_gen_config
    )
    log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
    for i, sol_text in enumerate(initial_solution_texts):
        log_entries.append(f"  Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")


    valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
    if not valid_initial_solutions:
        error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
        return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""

    # --- STAGE 2: CRITIQUE & EVALUATION ---
    progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
    log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**")
    
    evaluated_candidates_data = []
    llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}

    for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
        progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
        log_entries.append(f"\nEvaluating Candidate {i+1}:")
        if sol_text.startswith("ERROR"):
            eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
            log_entries.append(f"  Skipping detailed evaluation for error: {sol_text}")
        else:
            eval_res = evaluate_solution_candidate(
                sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
            )
            log_entries.append(f"  LLM Critique & Test Score: {eval_res.score}/10")
            log_entries.append(f"  Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
            if eval_res.execution_error: log_entries.append(f"  Execution Error: {eval_res.execution_error}")
            log_entries.append(f"  Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
        
        evaluated_candidates_data.append({
            "id": i + 1,
            "solution_text": sol_text,
            "evaluation": eval_res
        })

    # Format display for initial solutions
    initial_solutions_display_md = []
    for data in evaluated_candidates_data:
        initial_solutions_display_md.append(
            f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n"
            f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---"
        )
    
    # --- STAGE 3: SELECTION ---
    progress(0.75, desc="Stage 3: Selecting Champion...")
    # Filter out candidates that were errors from genesis before sorting by score
    valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
    if not valid_evaluated_candidates:
         return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""

    valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
    best_candidate_data = valid_evaluated_candidates[0]
    log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")

    best_solution_display_md = (
        f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n"
        f"```python\n{best_candidate_data['solution_text']}\n```\n"
        f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}"
    )

    # --- STAGE 4: EVOLUTION ---
    progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
    log_entries.append("\n**Stage 4: Evolutionary Forge**")
    llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
    
    evolved_solution_text = evolve_solution(
        best_candidate_data["solution_text"],
        str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
        best_candidate_data["evaluation"].score,
        problem_description,
        problem_type,
        llm_evolve_config
    )
    log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")

    evolved_solution_display_md = ""
    final_thoughts_md = "" # For LLM explanation of unit test results if needed

    if evolved_solution_text.startswith("ERROR"):
        evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}"
    else:
        evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```"
        # Optionally, re-evaluate the evolved solution with unit tests if provided
        if "python" in problem_type.lower() and user_tests_string:
            progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
            log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**")
            # Using a neutral LLM config for this, or could be separate
            # This evaluation is primarily for the test results, not another LLM critique of the evolved code
            evolved_eval_res = evaluate_solution_candidate(
                 evolved_solution_text, problem_description, problem_type, user_tests_string,
                 {"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
            )
            evolved_solution_display_md += (
                f"\n\n**Post-Evolution Test Results (Simulated):**\n"
                f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
            )
            if evolved_eval_res.execution_error:
                 evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
            log_entries.append(f"  Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")

            # Get LLM to explain the test results of the evolved code
            # progress(0.95, desc="Explaining Evolved Code Test Results...")
            # explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
            # explain_sys_prompt = get_system_prompt("code_execution_explainer")
            # explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
            # final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}"


    log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**")
    progress(1.0, desc="Cycle Complete!")
    return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md


# --- GRADIO UI (largely similar, but with a new input for user tests) ---
intro_markdown = """
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery, 
featuring basic (simulated) unit testing for Python code.

**API Keys Required in Space Secrets:**
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
"""
token_status_md = ""
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
    token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
else:
    if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
    else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
    if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
    else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"


with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
    gr.Markdown(intro_markdown)
    gr.HTML(token_status_md)

    if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
         gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
    else:
        with gr.Row():
            with gr.Column(scale=2): # Made input column wider
                gr.Markdown("## 💡 1. Define the Challenge")
                problem_type_dd = gr.Dropdown(
                    ["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
                    label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
                )
                problem_desc_tb = gr.Textbox(
                    lines=4, label="Problem Description / Desired Outcome",
                    placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
                )
                initial_hints_tb = gr.Textbox(
                    lines=2, label="Initial Thoughts / Constraints (Optional)",
                    placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
                )
                # NEW INPUT for User Tests
                user_tests_tb = gr.Textbox(
                    lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
                    placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
                    info="For 'Python Algorithm with Tests' type. Ignored otherwise."
                )
                
                gr.Markdown("## ⚙️ 2. Configure The Forge")
                model_select_dd = gr.Dropdown(
                    choices=list(AVAILABLE_MODELS.keys()),
                    value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
                    label="Select LLM Core Model"
                )
                num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs

                with gr.Accordion("Advanced LLM Parameters", open=False):
                    # ... (temp and max_tokens sliders - same as before) ...
                    with gr.Row():
                        gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
                        gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
                    with gr.Row():
                        eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
                        eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
                    with gr.Row():
                        evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
                        evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")


                submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")

            with gr.Column(scale=3): # Made output column wider
                gr.Markdown("## 🔥 3. The Forge's Output")
                with gr.Tabs():
                    with gr.TabItem("📜 Genesis Candidates & Evaluations"):
                        output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
                    with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
                        output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
                    with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
                        output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
                        # output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
                    with gr.TabItem("🛠️ Interaction Log (Dev View)"):
                        output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
        
        outputs_list = [
            output_initial_solutions_md, output_best_solution_md,
            output_evolved_solution_md, output_interaction_log_md,
            gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
        ]

        submit_btn.click(
            fn=run_algoforge_simulation,
            inputs=[
                problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
                num_solutions_slider, model_select_dd,
                gen_temp_slider, gen_max_tokens_slider,
                eval_temp_slider, eval_max_tokens_slider,
                evolve_temp_slider, evolve_max_tokens_slider
            ],
            outputs=outputs_list
        )
    gr.Markdown("---")
    gr.Markdown(
        "**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
        "Real sandboxing is complex and critical for safety."
    )

# --- Entry Point ---
if __name__ == "__main__":
    print("="*80)
    print("AlgoForge Prime™ (Modular Version) Starting...")
    # ... (startup print messages for API key status - same as before) ...
    print(f"UI default model key: {DEFAULT_MODEL_KEY}")
    print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
    print("="*80)
    demo.launch(debug=True, server_name="0.0.0.0")