Spaces:
Sleeping
Sleeping
# algoforge_prime/app.py | |
import gradio as gr | |
import os | |
# Initialize core components first (important for loading API keys etc.) | |
# This needs to happen before other core modules try to use the status | |
from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED | |
initialize_clients() # Explicitly initialize | |
from core.generation_engine import generate_initial_solutions | |
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult | |
from core.evolution_engine import evolve_solution | |
# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it | |
# --- MODEL DEFINITIONS (can also be moved to a config file/module later) --- | |
AVAILABLE_MODELS = {} | |
DEFAULT_MODEL_KEY = None | |
if GEMINI_API_CONFIGURED: | |
AVAILABLE_MODELS.update({ | |
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"}, | |
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"}, | |
}) | |
DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)" | |
if HF_API_CONFIGURED: | |
AVAILABLE_MODELS.update({ | |
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"}, | |
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"}, | |
}) | |
if not DEFAULT_MODEL_KEY: | |
DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)" | |
if not AVAILABLE_MODELS: | |
AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"} | |
DEFAULT_MODEL_KEY = "No Models Available" | |
elif not DEFAULT_MODEL_KEY: | |
DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0] | |
# --- Main Orchestration Logic --- | |
def run_algoforge_simulation( | |
problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string | |
num_initial_solutions, selected_model_key, | |
gen_temp, gen_max_tokens, | |
eval_temp, eval_max_tokens, | |
evolve_temp, evolve_max_tokens, | |
progress=gr.Progress(track_tqdm=True) # Gradio progress bar | |
): | |
progress(0, desc="Initializing AlgoForge Prime™...") | |
log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"] | |
if not problem_description: | |
return "ERROR: Problem Description is mandatory.", "", "", "", "" | |
model_config = AVAILABLE_MODELS.get(selected_model_key) | |
if not model_config or model_config["type"] == "none": | |
return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", "" | |
log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})") | |
log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}") | |
# --- STAGE 1: GENESIS --- | |
progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...") | |
log_entries.append("\n**Stage 1: Genesis Engine**") | |
llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens} | |
initial_solution_texts = generate_initial_solutions( | |
problem_description, initial_hints, problem_type, | |
num_initial_solutions, llm_gen_config | |
) | |
log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.") | |
for i, sol_text in enumerate(initial_solution_texts): | |
log_entries.append(f" Candidate {i+1} (Snippet): {str(sol_text)[:100]}...") | |
valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")] | |
if not valid_initial_solutions: | |
error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR"))) | |
return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), "" | |
# --- STAGE 2: CRITIQUE & EVALUATION --- | |
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...") | |
log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**") | |
evaluated_candidates_data = [] | |
llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens} | |
for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error | |
progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...") | |
log_entries.append(f"\nEvaluating Candidate {i+1}:") | |
if sol_text.startswith("ERROR"): | |
eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}") | |
log_entries.append(f" Skipping detailed evaluation for error: {sol_text}") | |
else: | |
eval_res = evaluate_solution_candidate( | |
sol_text, problem_description, problem_type, user_tests_string, llm_eval_config | |
) | |
log_entries.append(f" LLM Critique & Test Score: {eval_res.score}/10") | |
log_entries.append(f" Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.") | |
if eval_res.execution_error: log_entries.append(f" Execution Error: {eval_res.execution_error}") | |
log_entries.append(f" Full Critique (Snippet): {str(eval_res.critique)[:150]}...") | |
evaluated_candidates_data.append({ | |
"id": i + 1, | |
"solution_text": sol_text, | |
"evaluation": eval_res | |
}) | |
# Format display for initial solutions | |
initial_solutions_display_md = [] | |
for data in evaluated_candidates_data: | |
initial_solutions_display_md.append( | |
f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n" | |
f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---" | |
) | |
# --- STAGE 3: SELECTION --- | |
progress(0.75, desc="Stage 3: Selecting Champion...") | |
# Filter out candidates that were errors from genesis before sorting by score | |
valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")] | |
if not valid_evaluated_candidates: | |
return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), "" | |
valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True) | |
best_candidate_data = valid_evaluated_candidates[0] | |
log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.") | |
best_solution_display_md = ( | |
f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n" | |
f"```python\n{best_candidate_data['solution_text']}\n```\n" | |
f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}" | |
) | |
# --- STAGE 4: EVOLUTION --- | |
progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...") | |
log_entries.append("\n**Stage 4: Evolutionary Forge**") | |
llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens} | |
evolved_solution_text = evolve_solution( | |
best_candidate_data["solution_text"], | |
str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results | |
best_candidate_data["evaluation"].score, | |
problem_description, | |
problem_type, | |
llm_evolve_config | |
) | |
log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...") | |
evolved_solution_display_md = "" | |
final_thoughts_md = "" # For LLM explanation of unit test results if needed | |
if evolved_solution_text.startswith("ERROR"): | |
evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}" | |
else: | |
evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```" | |
# Optionally, re-evaluate the evolved solution with unit tests if provided | |
if "python" in problem_type.lower() and user_tests_string: | |
progress(0.9, desc="Re-evaluating Evolved Solution with Tests...") | |
log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**") | |
# Using a neutral LLM config for this, or could be separate | |
# This evaluation is primarily for the test results, not another LLM critique of the evolved code | |
evolved_eval_res = evaluate_solution_candidate( | |
evolved_solution_text, problem_description, problem_type, user_tests_string, | |
{"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval | |
) | |
evolved_solution_display_md += ( | |
f"\n\n**Post-Evolution Test Results (Simulated):**\n" | |
f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n" | |
) | |
if evolved_eval_res.execution_error: | |
evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n" | |
log_entries.append(f" Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.") | |
# Get LLM to explain the test results of the evolved code | |
# progress(0.95, desc="Explaining Evolved Code Test Results...") | |
# explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code." | |
# explain_sys_prompt = get_system_prompt("code_execution_explainer") | |
# explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one | |
# final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}" | |
log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**") | |
progress(1.0, desc="Cycle Complete!") | |
return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md | |
# --- GRADIO UI (largely similar, but with a new input for user tests) --- | |
intro_markdown = """ | |
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution | |
This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery, | |
featuring basic (simulated) unit testing for Python code. | |
**API Keys Required in Space Secrets:** | |
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models. | |
- `HF_TOKEN` (Secondary): For Hugging Face hosted models. | |
""" | |
token_status_md = "" | |
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED: | |
token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>" | |
else: | |
if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>" | |
else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>" | |
if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>" | |
else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>" | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo: | |
gr.Markdown(intro_markdown) | |
gr.HTML(token_status_md) | |
if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available": | |
gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>") | |
else: | |
with gr.Row(): | |
with gr.Column(scale=2): # Made input column wider | |
gr.Markdown("## 💡 1. Define the Challenge") | |
problem_type_dd = gr.Dropdown( | |
["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"], | |
label="Type of Problem/Algorithm", value="Python Algorithm with Tests" | |
) | |
problem_desc_tb = gr.Textbox( | |
lines=4, label="Problem Description / Desired Outcome", | |
placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'" | |
) | |
initial_hints_tb = gr.Textbox( | |
lines=2, label="Initial Thoughts / Constraints (Optional)", | |
placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'" | |
) | |
# NEW INPUT for User Tests | |
user_tests_tb = gr.Textbox( | |
lines=5, label="Python Unit Tests (Optional, one `assert` per line)", | |
placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True", | |
info="For 'Python Algorithm with Tests' type. Ignored otherwise." | |
) | |
gr.Markdown("## ⚙️ 2. Configure The Forge") | |
model_select_dd = gr.Dropdown( | |
choices=list(AVAILABLE_MODELS.keys()), | |
value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None), | |
label="Select LLM Core Model" | |
) | |
num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs | |
with gr.Accordion("Advanced LLM Parameters", open=False): | |
# ... (temp and max_tokens sliders - same as before) ... | |
with gr.Row(): | |
gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp") | |
gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens") | |
with gr.Row(): | |
eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp") | |
eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens") | |
with gr.Row(): | |
evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp") | |
evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens") | |
submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg") | |
with gr.Column(scale=3): # Made output column wider | |
gr.Markdown("## 🔥 3. The Forge's Output") | |
with gr.Tabs(): | |
with gr.TabItem("📜 Genesis Candidates & Evaluations"): | |
output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations") | |
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"): | |
output_best_solution_md = gr.Markdown(label="Top Pick for Refinement") | |
with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"): | |
output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge") | |
# output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output | |
with gr.TabItem("🛠️ Interaction Log (Dev View)"): | |
output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses") | |
outputs_list = [ | |
output_initial_solutions_md, output_best_solution_md, | |
output_evolved_solution_md, output_interaction_log_md, | |
gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component | |
] | |
submit_btn.click( | |
fn=run_algoforge_simulation, | |
inputs=[ | |
problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb | |
num_solutions_slider, model_select_dd, | |
gen_temp_slider, gen_max_tokens_slider, | |
eval_temp_slider, eval_max_tokens_slider, | |
evolve_temp_slider, evolve_max_tokens_slider | |
], | |
outputs=outputs_list | |
) | |
gr.Markdown("---") | |
gr.Markdown( | |
"**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** " | |
"Real sandboxing is complex and critical for safety." | |
) | |
# --- Entry Point --- | |
if __name__ == "__main__": | |
print("="*80) | |
print("AlgoForge Prime™ (Modular Version) Starting...") | |
# ... (startup print messages for API key status - same as before) ... | |
print(f"UI default model key: {DEFAULT_MODEL_KEY}") | |
print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}") | |
print("="*80) | |
demo.launch(debug=True, server_name="0.0.0.0") |