Spaces:
Sleeping
Sleeping
# algoforge_prime/app.py | |
import gradio as gr | |
import os | |
import time # For progress updates | |
from core.llm_clients import initialize_all_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED | |
from core.generation_engine import generate_initial_solutions | |
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult | |
from core.evolution_engine import evolve_solution | |
from prompts.system_prompts import get_system_prompt | |
from prompts.prompt_templates import format_code_test_analysis_user_prompt | |
# --- Application Configuration (Models, Defaults) --- | |
AVAILABLE_MODELS_CONFIG = {} | |
UI_DEFAULT_MODEL_KEY = None | |
# Populate with Gemini models if API is configured | |
if GEMINI_API_CONFIGURED: | |
AVAILABLE_MODELS_CONFIG.update({ | |
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"}, | |
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"}, | |
}) | |
UI_DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)" | |
# Populate with Hugging Face models if API is configured | |
if HF_API_CONFIGURED: | |
AVAILABLE_MODELS_CONFIG.update({ | |
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"}, | |
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"}, | |
"CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"}, # Smaller CodeLlama | |
}) | |
if not UI_DEFAULT_MODEL_KEY: # If Gemini isn't configured, default to an HF model | |
UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)" | |
# Absolute fallback if no models could be configured | |
if not AVAILABLE_MODELS_CONFIG: | |
print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets.") | |
AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys)"] = {"id": "dummy", "type": "none"} | |
UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys)" | |
elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG: | |
UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0] # Pick first available if default somehow not set | |
# --- Main Orchestration Logic for Gradio --- | |
def run_algoforge_simulation_orchestrator( | |
problem_type_selected: str, | |
problem_description_text: str, | |
initial_hints_text: str, | |
user_provided_tests_code: str, | |
num_initial_solutions_to_gen: int, | |
selected_model_ui_key: str, | |
genesis_temp: float, genesis_max_tokens: int, | |
critique_temp: float, critique_max_tokens: int, | |
evolution_temp: float, evolution_max_tokens: int, | |
progress=gr.Progress(track_tqdm=True) # Gradio progress tracker | |
): | |
progress(0, desc="Initializing AlgoForge Prime™...") | |
log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"] | |
start_time = time.time() | |
# Basic input validation | |
if not problem_description_text.strip(): | |
error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem." | |
log_entries.append(error_msg) | |
return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs | |
current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key) | |
if not current_model_config or current_model_config["type"] == "none": | |
error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart." | |
log_entries.append(error_msg) | |
return error_msg, "", "", "\n".join(log_entries), "" | |
log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})") | |
log_entries.append(f"Problem Type: {problem_type_selected}") | |
log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}") | |
# Prepare LLM configurations for each stage | |
llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens} | |
llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens} | |
llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens} | |
# --- STAGE 1: GENESIS --- | |
progress(0.1, desc="Stage 1: Genesis Engine - Generating Initial Solutions...") | |
log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**") | |
initial_raw_solutions = generate_initial_solutions( | |
problem_description_text, initial_hints_text, problem_type_selected, | |
num_initial_solutions_to_gen, llm_config_genesis | |
) | |
log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).") | |
for i, sol_text in enumerate(initial_raw_solutions): | |
log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...") | |
# --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION --- | |
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...") | |
log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**") | |
evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult} | |
for i, candidate_solution_text in enumerate(initial_raw_solutions): | |
current_progress = 0.3 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.35 # Progress for evaluation stage | |
progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...") | |
log_entries.append(f"\n--- Evaluating Candidate {i+1} ---") | |
# The evaluation_engine handles if candidate_solution_text itself is an error string | |
evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult | |
candidate_solution_text, problem_description_text, problem_type_selected, | |
user_provided_tests_code, llm_config_critique | |
) | |
log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10") | |
log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.") | |
if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}") | |
log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...") | |
evaluated_candidates_list.append({ | |
"id": i + 1, | |
"solution_text": candidate_solution_text, # Store original text, even if it was an error from genesis | |
"evaluation_result": evaluation_obj | |
}) | |
# Format display for initial solutions & evaluations | |
initial_solutions_display_markdown = [] | |
for data in evaluated_candidates_list: | |
initial_solutions_display_markdown.append( | |
f"**Candidate {data['id']}:**\n" | |
f"```python\n{data['solution_text']}\n```\n\n" # Assuming python for display, adjust if problem_type varies widely | |
f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n" | |
f"{data['evaluation_result'].critique_text}\n---" | |
) | |
# --- STAGE 3: SELECTION OF CHAMPION --- | |
progress(0.7, desc="Stage 3: Selecting Champion Candidate...") | |
log_entries.append("\n**------ STAGE 3: CHAMPION SELECTION ------**") | |
# Filter out candidates that were errors from genesis OR had very low evaluation scores (e.g., score of 0 from evaluation) | |
# We want to select a champion that is actually a piece of code/algorithm, not an error message. | |
potentially_viable_candidates = [ | |
cand for cand in evaluated_candidates_list | |
if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \ | |
cand["solution_text"] and not cand["solution_text"].startswith("ERROR") | |
] | |
if not potentially_viable_candidates: | |
final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low." | |
log_entries.append(f" CRITICAL: {final_error_msg}") | |
return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), "" | |
potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True) | |
champion_candidate_data = potentially_viable_candidates[0] | |
log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} " | |
f"(Solution Snippet: {champion_candidate_data['solution_text'][:60]}...) " | |
f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.") | |
champion_display_markdown = ( | |
f"**Champion Candidate ID: {champion_candidate_data['id']} " | |
f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n" | |
f"```python\n{champion_candidate_data['solution_text']}\n```\n\n" | |
f"**Original Comprehensive Evaluation for this Champion:**\n" | |
f"{champion_candidate_data['evaluation_result'].critique_text}" | |
) | |
# --- STAGE 4: EVOLUTIONARY FORGE --- | |
progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...") | |
log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**") | |
evolved_solution_code = evolve_solution( | |
champion_candidate_data["solution_text"], | |
champion_candidate_data["evaluation_result"].critique_text, # Pass the full critique | |
champion_candidate_data["evaluation_result"].score, | |
problem_description_text, | |
problem_type_selected, | |
llm_config_evolution | |
) | |
log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...") | |
evolved_solution_display_markdown = "" | |
ai_test_analysis_markdown = "" # For LLM explanation of unit test results of evolved code | |
if evolved_solution_code.startswith("ERROR"): | |
evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}" | |
else: | |
evolved_solution_display_markdown = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```" | |
# Optionally, re-evaluate the evolved solution with unit tests if provided and applicable | |
if "python" in problem_type_selected.lower() and user_provided_tests_code.strip(): | |
progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...") | |
log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---") | |
# Use a low temperature for this critique to focus on test results rather than creative critique | |
# The critique part here is mostly for consistency, primary goal is test execution. | |
evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens} | |
evolved_code_eval_result = evaluate_solution_candidate( # type: EvaluationResult | |
evolved_solution_code, problem_description_text, problem_type_selected, | |
user_provided_tests_code, evolved_critique_config | |
) | |
evolved_solution_display_markdown += ( | |
f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n" | |
f"{evolved_code_eval_result.execution_summary}\n" | |
f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n" | |
) | |
log_entries.append(f" Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. " | |
f"Summary: {evolved_code_eval_result.execution_summary}") | |
# Get LLM to explain the test results of the evolved code | |
if evolved_code_eval_result.total_tests > 0 : # Only if tests were run | |
progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...") | |
log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---") | |
analysis_user_prompt = format_code_test_analysis_user_prompt( | |
evolved_solution_code, | |
user_provided_tests_code, | |
evolved_code_eval_result.execution_summary # Pass the summary string | |
) | |
analysis_system_prompt = get_system_prompt("code_execution_explainer") | |
# Use a config for analysis - can be same as critique or specialized | |
llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], | |
"temp": 0.3, "max_tokens": critique_max_tokens + 100} # A bit more tokens for explanation | |
from core.llm_clients import call_huggingface_api, call_gemini_api # Re-import for clarity or use a dispatcher | |
explanation_response_obj = None | |
if llm_analysis_config["type"] == "hf": | |
explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt) | |
elif llm_analysis_config["type"] == "google_gemini": | |
explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt) | |
if explanation_response_obj and explanation_response_obj.success: | |
ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}" | |
log_entries.append(f" AI Test Analysis (Snippet): {explanation_response_obj.text[:100]}...") | |
elif explanation_response_obj: | |
ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}" | |
log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}") | |
total_time = time.time() - start_time | |
log_entries.append(f"\n**AlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.**") | |
progress(1.0, desc="Cycle Complete!") | |
return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown | |
# --- Gradio UI Definition --- | |
# (This section is largely similar to the previous app.py, with updated input/output connections) | |
intro_markdown = """ | |
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v2) | |
This enhanced version uses a modular codebase and demonstrates a conceptual workflow for AI-assisted algorithm discovery, | |
featuring (simulated) unit testing for Python code if provided. | |
**API Keys Required in Space Secrets:** | |
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project. | |
- `HF_TOKEN` (Secondary): For Hugging Face hosted models. | |
If keys are missing or invalid, corresponding models will be unavailable. | |
""" | |
# Determine API status for UI message | |
ui_token_status_md = "" | |
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED: | |
ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>" | |
else: | |
if GEMINI_API_CONFIGURED: | |
ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>" | |
else: | |
ui_token_status_md += "<p style='color:orange;'>⚠️ **GOOGLE_API_KEY missing or failed to configure.** Gemini API models will be disabled.</p>" | |
if HF_API_CONFIGURED: | |
ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>" | |
else: | |
ui_token_status_md += "<p style='color:orange;'>⚠️ **HF_TOKEN missing or client failed to initialize.** Hugging Face models will be disabled.</p>" | |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="AlgoForge Prime™ Modular v2") as app_demo: | |
gr.Markdown(intro_markdown) | |
gr.HTML(ui_token_status_md) | |
if not AVAILABLE_MODELS_CONFIG or UI_DEFAULT_MODEL_KEY == "No Models Available (Check API Keys)": | |
gr.Markdown("<h2 style='color:red;'>No LLM models are available. Please check your API key configurations in this Space's Secrets and restart the Space. The application cannot function without at least one working API configuration.</h2>") | |
else: | |
with gr.Row(): | |
# Input Column | |
with gr.Column(scale=2): # Input column slightly wider | |
gr.Markdown("## 💡 1. Define the Challenge") | |
problem_type_dropdown = gr.Dropdown( | |
choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"], | |
label="Type of Problem / Algorithm", value="Python Algorithm with Tests", | |
info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below." | |
) | |
problem_description_textbox = gr.Textbox( | |
lines=5, label="Problem Description / Desired Outcome", | |
placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0." | |
) | |
initial_hints_textbox = gr.Textbox( | |
lines=3, label="Initial Thoughts / Constraints / Seed Ideas (Optional)", | |
placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'" | |
) | |
user_tests_textbox = gr.Textbox( | |
lines=6, label="Python Unit Tests (Optional, one `assert` per line)", | |
placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# try: calculate_factorial(-1); assert False # Expected ValueError\n# except ValueError: assert True", | |
info="For 'Python Algorithm with Tests'. Ensure function names match your problem description." | |
) | |
gr.Markdown("## ⚙️ 2. Configure The Forge") | |
model_selection_dropdown = gr.Dropdown( | |
choices=list(AVAILABLE_MODELS_CONFIG.keys()), | |
value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None), | |
label="Select LLM Core Model", | |
info="Ensure the corresponding API key (Google or HF) is configured in secrets." | |
) | |
num_initial_solutions_slider = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") | |
with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False): | |
with gr.Row(): | |
genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.") # Gemini range often 0-1 | |
genesis_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=768, step=64, label="Genesis Max Output Tokens") | |
with gr.Row(): | |
critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp") | |
critique_max_tokens_slider = gr.Slider(minimum=150, maximum=1024, value=512, step=64, label="Critique Max Output Tokens") | |
with gr.Row(): | |
evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp") | |
evolution_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=1024, step=64, label="Evolution Max Output Tokens") | |
engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem") | |
# Output Column | |
with gr.Column(scale=3): # Output column wider | |
gr.Markdown("## 🔥 3. The Forge's Output") | |
with gr.Tabs(elem_id="output_tabs_elem"): | |
with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"): | |
output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations") | |
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"): | |
output_champion_markdown = gr.Markdown(label="Top Pick for Refinement") | |
with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"): | |
output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge") | |
output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance") | |
with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"): | |
output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses") | |
# Connect button to the orchestration function | |
engage_button.click( | |
fn=run_algoforge_simulation_orchestrator, | |
inputs=[ | |
problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox, | |
num_initial_solutions_slider, model_selection_dropdown, | |
genesis_temp_slider, genesis_max_tokens_slider, | |
critique_temp_slider, critique_max_tokens_slider, | |
evolution_temp_slider, evolution_max_tokens_slider | |
], | |
outputs=[ | |
output_initial_solutions_markdown, output_champion_markdown, | |
output_evolved_markdown, output_interaction_log_markdown, | |
output_ai_test_analysis_markdown | |
] | |
) | |
gr.Markdown("---") | |
gr.Markdown( | |
"**Disclaimer:** This is a conceptual, educational demonstration. " | |
"The (simulated) unit testing feature is for illustrative purposes. " | |
"**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** " | |
"Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. " | |
"LLM outputs always require careful human review and verification." | |
) | |
# --- Entry Point for Running the Gradio App --- | |
if __name__ == "__main__": | |
print("="*80) | |
print("AlgoForge Prime™ (Modular Version v2 with Simulated Testing) - Launching...") | |
print(f" Google Gemini API Configured: {GEMINI_API_CONFIGURED}") | |
print(f" Hugging Face API Configured: {HF_API_CONFIGURED}") | |
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED: | |
print(" CRITICAL WARNING: No API keys seem to be configured. The application will likely be non-functional.") | |
print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}") | |
print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}") | |
print("="*80) | |
app_demo.launch(debug=True, server_name="0.0.0.0") # server_name="0.0.0.0" is often needed for Docker/Spaces |