File size: 17,755 Bytes
ded730b
0751433
c984bb4
7dbc041
ded730b
 
 
 
7dbc041
ded730b
 
 
 
 
 
7dbc041
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ded730b
7dbc041
 
 
 
 
ded730b
7dbc041
 
c984bb4
ded730b
c984bb4
ded730b
c984bb4
 
 
ded730b
 
c984bb4
ded730b
 
 
c984bb4
ded730b
c984bb4
ded730b
 
 
7dbc041
ded730b
 
 
 
 
 
7dbc041
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0751433
ded730b
 
 
 
 
 
 
 
 
c984bb4
ded730b
 
c984bb4
ded730b
 
 
 
7dbc041
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
7dbc041
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c984bb4
ded730b
 
 
 
 
0751433
ded730b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c984bb4
ded730b
 
0751433
 
ded730b
c984bb4
ded730b
 
 
7dbc041
 
ded730b
 
0751433
7dbc041
 
ded730b
7dbc041
ded730b
 
 
 
c984bb4
0751433
ded730b
7dbc041
ded730b
7dbc041
 
ded730b
7dbc041
 
ded730b
7dbc041
 
ded730b
 
7dbc041
 
ded730b
 
7dbc041
 
ded730b
 
 
 
 
 
 
 
7dbc041
 
 
 
 
ded730b
7dbc041
 
ded730b
7dbc041
 
ded730b
7dbc041
ded730b
 
7dbc041
 
ded730b
7dbc041
 
ded730b
 
7dbc041
 
 
ded730b
7dbc041
 
ded730b
 
7dbc041
ded730b
 
 
 
7dbc041
 
ded730b
 
 
 
 
 
7dbc041
 
 
 
ded730b
7dbc041
 
 
 
 
ded730b
7dbc041
0751433
c984bb4
ded730b
 
c984bb4
0751433
ded730b
0751433
7dbc041
ded730b
 
 
7dbc041
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# algoforge_prime/app.py
import gradio as gr
import os

# Initialize core components first (important for loading API keys etc.)
# This needs to happen before other core modules try to use the status
from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED 
initialize_clients() # Explicitly initialize

from core.generation_engine import generate_initial_solutions
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
from core.evolution_engine import evolve_solution
# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it

# --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
AVAILABLE_MODELS = {}
DEFAULT_MODEL_KEY = None

if GEMINI_API_CONFIGURED:
    AVAILABLE_MODELS.update({
        "Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
        "Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
    })
    DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"

if HF_API_CONFIGURED:
    AVAILABLE_MODELS.update({
        "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
        "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
    })
    if not DEFAULT_MODEL_KEY: 
        DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"

if not AVAILABLE_MODELS:
    AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
    DEFAULT_MODEL_KEY = "No Models Available"
elif not DEFAULT_MODEL_KEY: 
    DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]


# --- Main Orchestration Logic ---
def run_algoforge_simulation(
    problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
    num_initial_solutions, selected_model_key,
    gen_temp, gen_max_tokens,
    eval_temp, eval_max_tokens,
    evolve_temp, evolve_max_tokens,
    progress=gr.Progress(track_tqdm=True) # Gradio progress bar
):
    progress(0, desc="Initializing AlgoForge Prime™...")
    log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"]

    if not problem_description:
        return "ERROR: Problem Description is mandatory.", "", "", "", ""

    model_config = AVAILABLE_MODELS.get(selected_model_key)
    if not model_config or model_config["type"] == "none":
        return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""
    
    log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
    log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")

    # --- STAGE 1: GENESIS ---
    progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
    log_entries.append("\n**Stage 1: Genesis Engine**")
    
    llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
    initial_solution_texts = generate_initial_solutions(
        problem_description, initial_hints, problem_type,
        num_initial_solutions, llm_gen_config
    )
    log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
    for i, sol_text in enumerate(initial_solution_texts):
        log_entries.append(f"  Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")


    valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
    if not valid_initial_solutions:
        error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
        return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""

    # --- STAGE 2: CRITIQUE & EVALUATION ---
    progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
    log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**")
    
    evaluated_candidates_data = []
    llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}

    for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
        progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
        log_entries.append(f"\nEvaluating Candidate {i+1}:")
        if sol_text.startswith("ERROR"):
            eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
            log_entries.append(f"  Skipping detailed evaluation for error: {sol_text}")
        else:
            eval_res = evaluate_solution_candidate(
                sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
            )
            log_entries.append(f"  LLM Critique & Test Score: {eval_res.score}/10")
            log_entries.append(f"  Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
            if eval_res.execution_error: log_entries.append(f"  Execution Error: {eval_res.execution_error}")
            log_entries.append(f"  Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
        
        evaluated_candidates_data.append({
            "id": i + 1,
            "solution_text": sol_text,
            "evaluation": eval_res
        })

    # Format display for initial solutions
    initial_solutions_display_md = []
    for data in evaluated_candidates_data:
        initial_solutions_display_md.append(
            f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n"
            f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---"
        )
    
    # --- STAGE 3: SELECTION ---
    progress(0.75, desc="Stage 3: Selecting Champion...")
    # Filter out candidates that were errors from genesis before sorting by score
    valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
    if not valid_evaluated_candidates:
         return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""

    valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
    best_candidate_data = valid_evaluated_candidates[0]
    log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")

    best_solution_display_md = (
        f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n"
        f"```python\n{best_candidate_data['solution_text']}\n```\n"
        f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}"
    )

    # --- STAGE 4: EVOLUTION ---
    progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
    log_entries.append("\n**Stage 4: Evolutionary Forge**")
    llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
    
    evolved_solution_text = evolve_solution(
        best_candidate_data["solution_text"],
        str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
        best_candidate_data["evaluation"].score,
        problem_description,
        problem_type,
        llm_evolve_config
    )
    log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")

    evolved_solution_display_md = ""
    final_thoughts_md = "" # For LLM explanation of unit test results if needed

    if evolved_solution_text.startswith("ERROR"):
        evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}"
    else:
        evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```"
        # Optionally, re-evaluate the evolved solution with unit tests if provided
        if "python" in problem_type.lower() and user_tests_string:
            progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
            log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**")
            # Using a neutral LLM config for this, or could be separate
            # This evaluation is primarily for the test results, not another LLM critique of the evolved code
            evolved_eval_res = evaluate_solution_candidate(
                 evolved_solution_text, problem_description, problem_type, user_tests_string,
                 {"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
            )
            evolved_solution_display_md += (
                f"\n\n**Post-Evolution Test Results (Simulated):**\n"
                f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
            )
            if evolved_eval_res.execution_error:
                 evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
            log_entries.append(f"  Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")

            # Get LLM to explain the test results of the evolved code
            # progress(0.95, desc="Explaining Evolved Code Test Results...")
            # explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
            # explain_sys_prompt = get_system_prompt("code_execution_explainer")
            # explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
            # final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}"


    log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**")
    progress(1.0, desc="Cycle Complete!")
    return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md


# --- GRADIO UI (largely similar, but with a new input for user tests) ---
intro_markdown = """
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery, 
featuring basic (simulated) unit testing for Python code.

**API Keys Required in Space Secrets:**
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
"""
token_status_md = ""
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
    token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
else:
    if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
    else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
    if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
    else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"


with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
    gr.Markdown(intro_markdown)
    gr.HTML(token_status_md)

    if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
         gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
    else:
        with gr.Row():
            with gr.Column(scale=2): # Made input column wider
                gr.Markdown("## 💡 1. Define the Challenge")
                problem_type_dd = gr.Dropdown(
                    ["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
                    label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
                )
                problem_desc_tb = gr.Textbox(
                    lines=4, label="Problem Description / Desired Outcome",
                    placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
                )
                initial_hints_tb = gr.Textbox(
                    lines=2, label="Initial Thoughts / Constraints (Optional)",
                    placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
                )
                # NEW INPUT for User Tests
                user_tests_tb = gr.Textbox(
                    lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
                    placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
                    info="For 'Python Algorithm with Tests' type. Ignored otherwise."
                )
                
                gr.Markdown("## ⚙️ 2. Configure The Forge")
                model_select_dd = gr.Dropdown(
                    choices=list(AVAILABLE_MODELS.keys()),
                    value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
                    label="Select LLM Core Model"
                )
                num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs

                with gr.Accordion("Advanced LLM Parameters", open=False):
                    # ... (temp and max_tokens sliders - same as before) ...
                    with gr.Row():
                        gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
                        gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
                    with gr.Row():
                        eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
                        eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
                    with gr.Row():
                        evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
                        evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")


                submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")

            with gr.Column(scale=3): # Made output column wider
                gr.Markdown("## 🔥 3. The Forge's Output")
                with gr.Tabs():
                    with gr.TabItem("📜 Genesis Candidates & Evaluations"):
                        output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
                    with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
                        output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
                    with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
                        output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
                        # output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
                    with gr.TabItem("🛠️ Interaction Log (Dev View)"):
                        output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
        
        outputs_list = [
            output_initial_solutions_md, output_best_solution_md,
            output_evolved_solution_md, output_interaction_log_md,
            gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
        ]

        submit_btn.click(
            fn=run_algoforge_simulation,
            inputs=[
                problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
                num_solutions_slider, model_select_dd,
                gen_temp_slider, gen_max_tokens_slider,
                eval_temp_slider, eval_max_tokens_slider,
                evolve_temp_slider, evolve_max_tokens_slider
            ],
            outputs=outputs_list
        )
    gr.Markdown("---")
    gr.Markdown(
        "**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
        "Real sandboxing is complex and critical for safety."
    )

# --- Entry Point ---
if __name__ == "__main__":
    print("="*80)
    print("AlgoForge Prime™ (Modular Version) Starting...")
    # ... (startup print messages for API key status - same as before) ...
    print(f"UI default model key: {DEFAULT_MODEL_KEY}")
    print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
    print("="*80)
    demo.launch(debug=True, server_name="0.0.0.0")