Spaces:
Sleeping
Sleeping
File size: 17,755 Bytes
ded730b 0751433 c984bb4 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 c984bb4 ded730b c984bb4 ded730b c984bb4 ded730b c984bb4 ded730b c984bb4 ded730b c984bb4 ded730b 7dbc041 ded730b 7dbc041 ded730b 0751433 ded730b c984bb4 ded730b c984bb4 ded730b 7dbc041 ded730b 7dbc041 ded730b c984bb4 ded730b 0751433 ded730b c984bb4 ded730b 0751433 ded730b c984bb4 ded730b 7dbc041 ded730b 0751433 7dbc041 ded730b 7dbc041 ded730b c984bb4 0751433 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 ded730b 7dbc041 0751433 c984bb4 ded730b c984bb4 0751433 ded730b 0751433 7dbc041 ded730b 7dbc041 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
# algoforge_prime/app.py
import gradio as gr
import os
# Initialize core components first (important for loading API keys etc.)
# This needs to happen before other core modules try to use the status
from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
initialize_clients() # Explicitly initialize
from core.generation_engine import generate_initial_solutions
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
from core.evolution_engine import evolve_solution
# from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it
# --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
AVAILABLE_MODELS = {}
DEFAULT_MODEL_KEY = None
if GEMINI_API_CONFIGURED:
AVAILABLE_MODELS.update({
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
})
DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
if HF_API_CONFIGURED:
AVAILABLE_MODELS.update({
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
})
if not DEFAULT_MODEL_KEY:
DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
if not AVAILABLE_MODELS:
AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
DEFAULT_MODEL_KEY = "No Models Available"
elif not DEFAULT_MODEL_KEY:
DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]
# --- Main Orchestration Logic ---
def run_algoforge_simulation(
problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
num_initial_solutions, selected_model_key,
gen_temp, gen_max_tokens,
eval_temp, eval_max_tokens,
evolve_temp, evolve_max_tokens,
progress=gr.Progress(track_tqdm=True) # Gradio progress bar
):
progress(0, desc="Initializing AlgoForge Prime™...")
log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"]
if not problem_description:
return "ERROR: Problem Description is mandatory.", "", "", "", ""
model_config = AVAILABLE_MODELS.get(selected_model_key)
if not model_config or model_config["type"] == "none":
return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""
log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")
# --- STAGE 1: GENESIS ---
progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
log_entries.append("\n**Stage 1: Genesis Engine**")
llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
initial_solution_texts = generate_initial_solutions(
problem_description, initial_hints, problem_type,
num_initial_solutions, llm_gen_config
)
log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
for i, sol_text in enumerate(initial_solution_texts):
log_entries.append(f" Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")
valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
if not valid_initial_solutions:
error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""
# --- STAGE 2: CRITIQUE & EVALUATION ---
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**")
evaluated_candidates_data = []
llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}
for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
log_entries.append(f"\nEvaluating Candidate {i+1}:")
if sol_text.startswith("ERROR"):
eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
log_entries.append(f" Skipping detailed evaluation for error: {sol_text}")
else:
eval_res = evaluate_solution_candidate(
sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
)
log_entries.append(f" LLM Critique & Test Score: {eval_res.score}/10")
log_entries.append(f" Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
if eval_res.execution_error: log_entries.append(f" Execution Error: {eval_res.execution_error}")
log_entries.append(f" Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
evaluated_candidates_data.append({
"id": i + 1,
"solution_text": sol_text,
"evaluation": eval_res
})
# Format display for initial solutions
initial_solutions_display_md = []
for data in evaluated_candidates_data:
initial_solutions_display_md.append(
f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n"
f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---"
)
# --- STAGE 3: SELECTION ---
progress(0.75, desc="Stage 3: Selecting Champion...")
# Filter out candidates that were errors from genesis before sorting by score
valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
if not valid_evaluated_candidates:
return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""
valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
best_candidate_data = valid_evaluated_candidates[0]
log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")
best_solution_display_md = (
f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n"
f"```python\n{best_candidate_data['solution_text']}\n```\n"
f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}"
)
# --- STAGE 4: EVOLUTION ---
progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
log_entries.append("\n**Stage 4: Evolutionary Forge**")
llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
evolved_solution_text = evolve_solution(
best_candidate_data["solution_text"],
str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
best_candidate_data["evaluation"].score,
problem_description,
problem_type,
llm_evolve_config
)
log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")
evolved_solution_display_md = ""
final_thoughts_md = "" # For LLM explanation of unit test results if needed
if evolved_solution_text.startswith("ERROR"):
evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}"
else:
evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```"
# Optionally, re-evaluate the evolved solution with unit tests if provided
if "python" in problem_type.lower() and user_tests_string:
progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**")
# Using a neutral LLM config for this, or could be separate
# This evaluation is primarily for the test results, not another LLM critique of the evolved code
evolved_eval_res = evaluate_solution_candidate(
evolved_solution_text, problem_description, problem_type, user_tests_string,
{"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
)
evolved_solution_display_md += (
f"\n\n**Post-Evolution Test Results (Simulated):**\n"
f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
)
if evolved_eval_res.execution_error:
evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
log_entries.append(f" Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")
# Get LLM to explain the test results of the evolved code
# progress(0.95, desc="Explaining Evolved Code Test Results...")
# explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
# explain_sys_prompt = get_system_prompt("code_execution_explainer")
# explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
# final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}"
log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**")
progress(1.0, desc="Cycle Complete!")
return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md
# --- GRADIO UI (largely similar, but with a new input for user tests) ---
intro_markdown = """
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery,
featuring basic (simulated) unit testing for Python code.
**API Keys Required in Space Secrets:**
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
"""
token_status_md = ""
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
else:
if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
gr.Markdown(intro_markdown)
gr.HTML(token_status_md)
if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
else:
with gr.Row():
with gr.Column(scale=2): # Made input column wider
gr.Markdown("## 💡 1. Define the Challenge")
problem_type_dd = gr.Dropdown(
["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
)
problem_desc_tb = gr.Textbox(
lines=4, label="Problem Description / Desired Outcome",
placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
)
initial_hints_tb = gr.Textbox(
lines=2, label="Initial Thoughts / Constraints (Optional)",
placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
)
# NEW INPUT for User Tests
user_tests_tb = gr.Textbox(
lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
info="For 'Python Algorithm with Tests' type. Ignored otherwise."
)
gr.Markdown("## ⚙️ 2. Configure The Forge")
model_select_dd = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
label="Select LLM Core Model"
)
num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs
with gr.Accordion("Advanced LLM Parameters", open=False):
# ... (temp and max_tokens sliders - same as before) ...
with gr.Row():
gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
with gr.Row():
eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
with gr.Row():
evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")
submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")
with gr.Column(scale=3): # Made output column wider
gr.Markdown("## 🔥 3. The Forge's Output")
with gr.Tabs():
with gr.TabItem("📜 Genesis Candidates & Evaluations"):
output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
# output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
with gr.TabItem("🛠️ Interaction Log (Dev View)"):
output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
outputs_list = [
output_initial_solutions_md, output_best_solution_md,
output_evolved_solution_md, output_interaction_log_md,
gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
]
submit_btn.click(
fn=run_algoforge_simulation,
inputs=[
problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
num_solutions_slider, model_select_dd,
gen_temp_slider, gen_max_tokens_slider,
eval_temp_slider, eval_max_tokens_slider,
evolve_temp_slider, evolve_max_tokens_slider
],
outputs=outputs_list
)
gr.Markdown("---")
gr.Markdown(
"**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
"Real sandboxing is complex and critical for safety."
)
# --- Entry Point ---
if __name__ == "__main__":
print("="*80)
print("AlgoForge Prime™ (Modular Version) Starting...")
# ... (startup print messages for API key status - same as before) ...
print(f"UI default model key: {DEFAULT_MODEL_KEY}")
print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
print("="*80)
demo.launch(debug=True, server_name="0.0.0.0") |