mgbam's picture
Update app.py
ded730b verified
raw
history blame
17.8 kB
# algoforge_prime/app.py
import gradio as gr
import os
# Initialize core components first (important for loading API keys etc.)
# This needs to happen before other core modules try to use the status
from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
initialize_clients() # Explicitly initialize
from core.generation_engine import generate_initial_solutions
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
from core.evolution_engine import evolve_solution
# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it
# --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
AVAILABLE_MODELS = {}
DEFAULT_MODEL_KEY = None
if GEMINI_API_CONFIGURED:
AVAILABLE_MODELS.update({
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
})
DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
if HF_API_CONFIGURED:
AVAILABLE_MODELS.update({
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
})
if not DEFAULT_MODEL_KEY:
DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
if not AVAILABLE_MODELS:
AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
DEFAULT_MODEL_KEY = "No Models Available"
elif not DEFAULT_MODEL_KEY:
DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]
# --- Main Orchestration Logic ---
def run_algoforge_simulation(
problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
num_initial_solutions, selected_model_key,
gen_temp, gen_max_tokens,
eval_temp, eval_max_tokens,
evolve_temp, evolve_max_tokens,
progress=gr.Progress(track_tqdm=True) # Gradio progress bar
):
progress(0, desc="Initializing AlgoForge Prime™...")
log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"]
if not problem_description:
return "ERROR: Problem Description is mandatory.", "", "", "", ""
model_config = AVAILABLE_MODELS.get(selected_model_key)
if not model_config or model_config["type"] == "none":
return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""
log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")
# --- STAGE 1: GENESIS ---
progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
log_entries.append("\n**Stage 1: Genesis Engine**")
llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
initial_solution_texts = generate_initial_solutions(
problem_description, initial_hints, problem_type,
num_initial_solutions, llm_gen_config
)
log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
for i, sol_text in enumerate(initial_solution_texts):
log_entries.append(f" Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")
valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
if not valid_initial_solutions:
error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""
# --- STAGE 2: CRITIQUE & EVALUATION ---
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**")
evaluated_candidates_data = []
llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}
for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
log_entries.append(f"\nEvaluating Candidate {i+1}:")
if sol_text.startswith("ERROR"):
eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
log_entries.append(f" Skipping detailed evaluation for error: {sol_text}")
else:
eval_res = evaluate_solution_candidate(
sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
)
log_entries.append(f" LLM Critique & Test Score: {eval_res.score}/10")
log_entries.append(f" Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
if eval_res.execution_error: log_entries.append(f" Execution Error: {eval_res.execution_error}")
log_entries.append(f" Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
evaluated_candidates_data.append({
"id": i + 1,
"solution_text": sol_text,
"evaluation": eval_res
})
# Format display for initial solutions
initial_solutions_display_md = []
for data in evaluated_candidates_data:
initial_solutions_display_md.append(
f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n"
f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---"
)
# --- STAGE 3: SELECTION ---
progress(0.75, desc="Stage 3: Selecting Champion...")
# Filter out candidates that were errors from genesis before sorting by score
valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
if not valid_evaluated_candidates:
return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""
valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
best_candidate_data = valid_evaluated_candidates[0]
log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")
best_solution_display_md = (
f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n"
f"```python\n{best_candidate_data['solution_text']}\n```\n"
f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}"
)
# --- STAGE 4: EVOLUTION ---
progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
log_entries.append("\n**Stage 4: Evolutionary Forge**")
llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
evolved_solution_text = evolve_solution(
best_candidate_data["solution_text"],
str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
best_candidate_data["evaluation"].score,
problem_description,
problem_type,
llm_evolve_config
)
log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")
evolved_solution_display_md = ""
final_thoughts_md = "" # For LLM explanation of unit test results if needed
if evolved_solution_text.startswith("ERROR"):
evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}"
else:
evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```"
# Optionally, re-evaluate the evolved solution with unit tests if provided
if "python" in problem_type.lower() and user_tests_string:
progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**")
# Using a neutral LLM config for this, or could be separate
# This evaluation is primarily for the test results, not another LLM critique of the evolved code
evolved_eval_res = evaluate_solution_candidate(
evolved_solution_text, problem_description, problem_type, user_tests_string,
{"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
)
evolved_solution_display_md += (
f"\n\n**Post-Evolution Test Results (Simulated):**\n"
f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
)
if evolved_eval_res.execution_error:
evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
log_entries.append(f" Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")
# Get LLM to explain the test results of the evolved code
# progress(0.95, desc="Explaining Evolved Code Test Results...")
# explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
# explain_sys_prompt = get_system_prompt("code_execution_explainer")
# explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
# final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}"
log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**")
progress(1.0, desc="Cycle Complete!")
return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md
# --- GRADIO UI (largely similar, but with a new input for user tests) ---
intro_markdown = """
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery,
featuring basic (simulated) unit testing for Python code.
**API Keys Required in Space Secrets:**
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
"""
token_status_md = ""
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
else:
if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
gr.Markdown(intro_markdown)
gr.HTML(token_status_md)
if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
else:
with gr.Row():
with gr.Column(scale=2): # Made input column wider
gr.Markdown("## 💡 1. Define the Challenge")
problem_type_dd = gr.Dropdown(
["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
)
problem_desc_tb = gr.Textbox(
lines=4, label="Problem Description / Desired Outcome",
placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
)
initial_hints_tb = gr.Textbox(
lines=2, label="Initial Thoughts / Constraints (Optional)",
placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
)
# NEW INPUT for User Tests
user_tests_tb = gr.Textbox(
lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
info="For 'Python Algorithm with Tests' type. Ignored otherwise."
)
gr.Markdown("## ⚙️ 2. Configure The Forge")
model_select_dd = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
label="Select LLM Core Model"
)
num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs
with gr.Accordion("Advanced LLM Parameters", open=False):
# ... (temp and max_tokens sliders - same as before) ...
with gr.Row():
gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
with gr.Row():
eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
with gr.Row():
evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")
submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")
with gr.Column(scale=3): # Made output column wider
gr.Markdown("## 🔥 3. The Forge's Output")
with gr.Tabs():
with gr.TabItem("📜 Genesis Candidates & Evaluations"):
output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
# output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
with gr.TabItem("🛠️ Interaction Log (Dev View)"):
output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
outputs_list = [
output_initial_solutions_md, output_best_solution_md,
output_evolved_solution_md, output_interaction_log_md,
gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
]
submit_btn.click(
fn=run_algoforge_simulation,
inputs=[
problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
num_solutions_slider, model_select_dd,
gen_temp_slider, gen_max_tokens_slider,
eval_temp_slider, eval_max_tokens_slider,
evolve_temp_slider, evolve_max_tokens_slider
],
outputs=outputs_list
)
gr.Markdown("---")
gr.Markdown(
"**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
"Real sandboxing is complex and critical for safety."
)
# --- Entry Point ---
if __name__ == "__main__":
print("="*80)
print("AlgoForge Prime™ (Modular Version) Starting...")
# ... (startup print messages for API key status - same as before) ...
print(f"UI default model key: {DEFAULT_MODEL_KEY}")
print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
print("="*80)
demo.launch(debug=True, server_name="0.0.0.0")