mgbam's picture
Update app.py
32333bf verified
raw
history blame
24 kB
# algoforge_prime/app.py
import gradio as gr
import os
import time # For progress updates
from core.llm_clients import initialize_all_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
from core.generation_engine import generate_initial_solutions
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
from core.evolution_engine import evolve_solution
from prompts.system_prompts import get_system_prompt
from prompts.prompt_templates import format_code_test_analysis_user_prompt
# --- Application Configuration (Models, Defaults) ---
AVAILABLE_MODELS_CONFIG = {}
UI_DEFAULT_MODEL_KEY = None
# Populate with Gemini models if API is configured
if GEMINI_API_CONFIGURED:
AVAILABLE_MODELS_CONFIG.update({
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
})
UI_DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
# Populate with Hugging Face models if API is configured
if HF_API_CONFIGURED:
AVAILABLE_MODELS_CONFIG.update({
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
"CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"}, # Smaller CodeLlama
})
if not UI_DEFAULT_MODEL_KEY: # If Gemini isn't configured, default to an HF model
UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
# Absolute fallback if no models could be configured
if not AVAILABLE_MODELS_CONFIG:
print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets.")
AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys)"] = {"id": "dummy", "type": "none"}
UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys)"
elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG:
UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0] # Pick first available if default somehow not set
# --- Main Orchestration Logic for Gradio ---
def run_algoforge_simulation_orchestrator(
problem_type_selected: str,
problem_description_text: str,
initial_hints_text: str,
user_provided_tests_code: str,
num_initial_solutions_to_gen: int,
selected_model_ui_key: str,
genesis_temp: float, genesis_max_tokens: int,
critique_temp: float, critique_max_tokens: int,
evolution_temp: float, evolution_max_tokens: int,
progress=gr.Progress(track_tqdm=True) # Gradio progress tracker
):
progress(0, desc="Initializing AlgoForge Prime™...")
log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
start_time = time.time()
# Basic input validation
if not problem_description_text.strip():
error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
log_entries.append(error_msg)
return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs
current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
if not current_model_config or current_model_config["type"] == "none":
error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart."
log_entries.append(error_msg)
return error_msg, "", "", "\n".join(log_entries), ""
log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})")
log_entries.append(f"Problem Type: {problem_type_selected}")
log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")
# Prepare LLM configurations for each stage
llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
# --- STAGE 1: GENESIS ---
progress(0.1, desc="Stage 1: Genesis Engine - Generating Initial Solutions...")
log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**")
initial_raw_solutions = generate_initial_solutions(
problem_description_text, initial_hints_text, problem_type_selected,
num_initial_solutions_to_gen, llm_config_genesis
)
log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
for i, sol_text in enumerate(initial_raw_solutions):
log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...")
# --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**")
evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult}
for i, candidate_solution_text in enumerate(initial_raw_solutions):
current_progress = 0.3 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.35 # Progress for evaluation stage
progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
# The evaluation_engine handles if candidate_solution_text itself is an error string
evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult
candidate_solution_text, problem_description_text, problem_type_selected,
user_provided_tests_code, llm_config_critique
)
log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10")
log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}")
log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")
evaluated_candidates_list.append({
"id": i + 1,
"solution_text": candidate_solution_text, # Store original text, even if it was an error from genesis
"evaluation_result": evaluation_obj
})
# Format display for initial solutions & evaluations
initial_solutions_display_markdown = []
for data in evaluated_candidates_list:
initial_solutions_display_markdown.append(
f"**Candidate {data['id']}:**\n"
f"```python\n{data['solution_text']}\n```\n\n" # Assuming python for display, adjust if problem_type varies widely
f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n"
f"{data['evaluation_result'].critique_text}\n---"
)
# --- STAGE 3: SELECTION OF CHAMPION ---
progress(0.7, desc="Stage 3: Selecting Champion Candidate...")
log_entries.append("\n**------ STAGE 3: CHAMPION SELECTION ------**")
# Filter out candidates that were errors from genesis OR had very low evaluation scores (e.g., score of 0 from evaluation)
# We want to select a champion that is actually a piece of code/algorithm, not an error message.
potentially_viable_candidates = [
cand for cand in evaluated_candidates_list
if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \
cand["solution_text"] and not cand["solution_text"].startswith("ERROR")
]
if not potentially_viable_candidates:
final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low."
log_entries.append(f" CRITICAL: {final_error_msg}")
return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""
potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
champion_candidate_data = potentially_viable_candidates[0]
log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
f"(Solution Snippet: {champion_candidate_data['solution_text'][:60]}...) "
f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")
champion_display_markdown = (
f"**Champion Candidate ID: {champion_candidate_data['id']} "
f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n"
f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
f"**Original Comprehensive Evaluation for this Champion:**\n"
f"{champion_candidate_data['evaluation_result'].critique_text}"
)
# --- STAGE 4: EVOLUTIONARY FORGE ---
progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...")
log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**")
evolved_solution_code = evolve_solution(
champion_candidate_data["solution_text"],
champion_candidate_data["evaluation_result"].critique_text, # Pass the full critique
champion_candidate_data["evaluation_result"].score,
problem_description_text,
problem_type_selected,
llm_config_evolution
)
log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...")
evolved_solution_display_markdown = ""
ai_test_analysis_markdown = "" # For LLM explanation of unit test results of evolved code
if evolved_solution_code.startswith("ERROR"):
evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
else:
evolved_solution_display_markdown = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
# Optionally, re-evaluate the evolved solution with unit tests if provided and applicable
if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...")
log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---")
# Use a low temperature for this critique to focus on test results rather than creative critique
# The critique part here is mostly for consistency, primary goal is test execution.
evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens}
evolved_code_eval_result = evaluate_solution_candidate( # type: EvaluationResult
evolved_solution_code, problem_description_text, problem_type_selected,
user_provided_tests_code, evolved_critique_config
)
evolved_solution_display_markdown += (
f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
f"{evolved_code_eval_result.execution_summary}\n"
f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n"
)
log_entries.append(f" Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. "
f"Summary: {evolved_code_eval_result.execution_summary}")
# Get LLM to explain the test results of the evolved code
if evolved_code_eval_result.total_tests > 0 : # Only if tests were run
progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---")
analysis_user_prompt = format_code_test_analysis_user_prompt(
evolved_solution_code,
user_provided_tests_code,
evolved_code_eval_result.execution_summary # Pass the summary string
)
analysis_system_prompt = get_system_prompt("code_execution_explainer")
# Use a config for analysis - can be same as critique or specialized
llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
"temp": 0.3, "max_tokens": critique_max_tokens + 100} # A bit more tokens for explanation
from core.llm_clients import call_huggingface_api, call_gemini_api # Re-import for clarity or use a dispatcher
explanation_response_obj = None
if llm_analysis_config["type"] == "hf":
explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
elif llm_analysis_config["type"] == "google_gemini":
explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
if explanation_response_obj and explanation_response_obj.success:
ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
log_entries.append(f" AI Test Analysis (Snippet): {explanation_response_obj.text[:100]}...")
elif explanation_response_obj:
ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}"
log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}")
total_time = time.time() - start_time
log_entries.append(f"\n**AlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.**")
progress(1.0, desc="Cycle Complete!")
return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
# --- Gradio UI Definition ---
# (This section is largely similar to the previous app.py, with updated input/output connections)
intro_markdown = """
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v2)
This enhanced version uses a modular codebase and demonstrates a conceptual workflow for AI-assisted algorithm discovery,
featuring (simulated) unit testing for Python code if provided.
**API Keys Required in Space Secrets:**
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project.
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
If keys are missing or invalid, corresponding models will be unavailable.
"""
# Determine API status for UI message
ui_token_status_md = ""
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>"
else:
if GEMINI_API_CONFIGURED:
ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>"
else:
ui_token_status_md += "<p style='color:orange;'>⚠️ **GOOGLE_API_KEY missing or failed to configure.** Gemini API models will be disabled.</p>"
if HF_API_CONFIGURED:
ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>"
else:
ui_token_status_md += "<p style='color:orange;'>⚠️ **HF_TOKEN missing or client failed to initialize.** Hugging Face models will be disabled.</p>"
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="AlgoForge Prime™ Modular v2") as app_demo:
gr.Markdown(intro_markdown)
gr.HTML(ui_token_status_md)
if not AVAILABLE_MODELS_CONFIG or UI_DEFAULT_MODEL_KEY == "No Models Available (Check API Keys)":
gr.Markdown("<h2 style='color:red;'>No LLM models are available. Please check your API key configurations in this Space's Secrets and restart the Space. The application cannot function without at least one working API configuration.</h2>")
else:
with gr.Row():
# Input Column
with gr.Column(scale=2): # Input column slightly wider
gr.Markdown("## 💡 1. Define the Challenge")
problem_type_dropdown = gr.Dropdown(
choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"],
label="Type of Problem / Algorithm", value="Python Algorithm with Tests",
info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below."
)
problem_description_textbox = gr.Textbox(
lines=5, label="Problem Description / Desired Outcome",
placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
)
initial_hints_textbox = gr.Textbox(
lines=3, label="Initial Thoughts / Constraints / Seed Ideas (Optional)",
placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
)
user_tests_textbox = gr.Textbox(
lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# try: calculate_factorial(-1); assert False # Expected ValueError\n# except ValueError: assert True",
info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
)
gr.Markdown("## ⚙️ 2. Configure The Forge")
model_selection_dropdown = gr.Dropdown(
choices=list(AVAILABLE_MODELS_CONFIG.keys()),
value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None),
label="Select LLM Core Model",
info="Ensure the corresponding API key (Google or HF) is configured in secrets."
)
num_initial_solutions_slider = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)")
with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False):
with gr.Row():
genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.") # Gemini range often 0-1
genesis_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=768, step=64, label="Genesis Max Output Tokens")
with gr.Row():
critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
critique_max_tokens_slider = gr.Slider(minimum=150, maximum=1024, value=512, step=64, label="Critique Max Output Tokens")
with gr.Row():
evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
evolution_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=1024, step=64, label="Evolution Max Output Tokens")
engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")
# Output Column
with gr.Column(scale=3): # Output column wider
gr.Markdown("## 🔥 3. The Forge's Output")
with gr.Tabs(elem_id="output_tabs_elem"):
with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"):
output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations")
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"):
output_champion_markdown = gr.Markdown(label="Top Pick for Refinement")
with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"):
output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge")
output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
# Connect button to the orchestration function
engage_button.click(
fn=run_algoforge_simulation_orchestrator,
inputs=[
problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox,
num_initial_solutions_slider, model_selection_dropdown,
genesis_temp_slider, genesis_max_tokens_slider,
critique_temp_slider, critique_max_tokens_slider,
evolution_temp_slider, evolution_max_tokens_slider
],
outputs=[
output_initial_solutions_markdown, output_champion_markdown,
output_evolved_markdown, output_interaction_log_markdown,
output_ai_test_analysis_markdown
]
)
gr.Markdown("---")
gr.Markdown(
"**Disclaimer:** This is a conceptual, educational demonstration. "
"The (simulated) unit testing feature is for illustrative purposes. "
"**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
"Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
"LLM outputs always require careful human review and verification."
)
# --- Entry Point for Running the Gradio App ---
if __name__ == "__main__":
print("="*80)
print("AlgoForge Prime™ (Modular Version v2 with Simulated Testing) - Launching...")
print(f" Google Gemini API Configured: {GEMINI_API_CONFIGURED}")
print(f" Hugging Face API Configured: {HF_API_CONFIGURED}")
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
print(" CRITICAL WARNING: No API keys seem to be configured. The application will likely be non-functional.")
print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
print("="*80)
app_demo.launch(debug=True, server_name="0.0.0.0") # server_name="0.0.0.0" is often needed for Docker/Spaces