Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,304 +1,386 @@
|
|
1 |
# algoforge_prime/app.py
|
2 |
import gradio as gr
|
3 |
import os
|
|
|
4 |
|
5 |
-
#
|
6 |
-
#
|
7 |
-
from core.llm_clients import
|
8 |
-
|
9 |
|
10 |
from core.generation_engine import generate_initial_solutions
|
11 |
-
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
|
12 |
from core.evolution_engine import evolve_solution
|
13 |
-
|
|
|
14 |
|
15 |
-
# ---
|
16 |
-
|
17 |
-
|
18 |
|
|
|
19 |
if GEMINI_API_CONFIGURED:
|
20 |
-
|
21 |
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
|
22 |
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
|
23 |
})
|
24 |
-
|
25 |
|
|
|
26 |
if HF_API_CONFIGURED:
|
27 |
-
|
28 |
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
|
29 |
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
|
|
|
30 |
})
|
31 |
-
if not
|
32 |
-
|
33 |
-
|
34 |
-
if
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
):
|
50 |
progress(0, desc="Initializing AlgoForge Prime™...")
|
51 |
-
log_entries = [f"**AlgoForge Prime™ Cycle Starting
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
return
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# --- STAGE 1: GENESIS ---
|
64 |
-
progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
|
65 |
-
log_entries.append("\n
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
num_initial_solutions, llm_gen_config
|
71 |
)
|
72 |
-
log_entries.append(f"
|
73 |
-
for i, sol_text in enumerate(
|
74 |
-
log_entries.append(f" Candidate {i+1} (Snippet): {str(sol_text)[:
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
if not valid_initial_solutions:
|
79 |
-
error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
|
80 |
-
return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""
|
81 |
-
|
82 |
-
# --- STAGE 2: CRITIQUE & EVALUATION ---
|
83 |
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
|
84 |
-
log_entries.append("\n
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
progress(
|
91 |
-
log_entries.append(f"\
|
92 |
-
if sol_text.startswith("ERROR"):
|
93 |
-
eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
|
94 |
-
log_entries.append(f" Skipping detailed evaluation for error: {sol_text}")
|
95 |
-
else:
|
96 |
-
eval_res = evaluate_solution_candidate(
|
97 |
-
sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
|
98 |
-
)
|
99 |
-
log_entries.append(f" LLM Critique & Test Score: {eval_res.score}/10")
|
100 |
-
log_entries.append(f" Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
|
101 |
-
if eval_res.execution_error: log_entries.append(f" Execution Error: {eval_res.execution_error}")
|
102 |
-
log_entries.append(f" Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
"id": i + 1,
|
106 |
-
"solution_text":
|
107 |
-
"
|
108 |
})
|
109 |
|
110 |
-
# Format display for initial solutions
|
111 |
-
|
112 |
-
for data in
|
113 |
-
|
114 |
-
f"**Candidate {data['id']}:**\n
|
115 |
-
f"
|
|
|
|
|
116 |
)
|
117 |
|
118 |
-
# --- STAGE 3: SELECTION ---
|
119 |
-
progress(0.
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
)
|
134 |
|
135 |
-
# --- STAGE 4:
|
136 |
-
progress(0.
|
137 |
-
log_entries.append("\n
|
138 |
-
llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
)
|
148 |
-
log_entries.append(f"Evolved
|
149 |
|
150 |
-
|
151 |
-
|
152 |
|
153 |
-
if
|
154 |
-
|
155 |
else:
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
166 |
)
|
167 |
-
|
168 |
-
|
169 |
-
f"
|
|
|
|
|
170 |
)
|
171 |
-
|
172 |
-
|
173 |
-
log_entries.append(f" Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")
|
174 |
|
175 |
# Get LLM to explain the test results of the evolved code
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
progress(1.0, desc="Cycle Complete!")
|
185 |
-
|
|
|
|
|
186 |
|
|
|
|
|
187 |
|
188 |
-
# --- GRADIO UI (largely similar, but with a new input for user tests) ---
|
189 |
intro_markdown = """
|
190 |
-
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
|
191 |
-
This enhanced version demonstrates a
|
192 |
-
featuring
|
193 |
|
194 |
**API Keys Required in Space Secrets:**
|
195 |
-
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
|
196 |
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
|
|
|
197 |
"""
|
198 |
-
|
|
|
|
|
199 |
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
|
200 |
-
|
201 |
else:
|
202 |
-
if GEMINI_API_CONFIGURED:
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
|
208 |
-
with gr.Blocks(theme=gr.themes.Soft(primary_hue="
|
209 |
gr.Markdown(intro_markdown)
|
210 |
-
gr.HTML(
|
211 |
|
212 |
-
if not
|
213 |
-
gr.Markdown("<h2 style='color:red;'>No models are available.
|
214 |
else:
|
215 |
with gr.Row():
|
216 |
-
|
|
|
217 |
gr.Markdown("## 💡 1. Define the Challenge")
|
218 |
-
|
219 |
-
["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
|
220 |
-
label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
|
|
|
221 |
)
|
222 |
-
|
223 |
-
lines=
|
224 |
-
placeholder="
|
225 |
)
|
226 |
-
|
227 |
-
lines=
|
228 |
-
placeholder="
|
229 |
)
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
info="For 'Python Algorithm with Tests' type. Ignored otherwise."
|
235 |
)
|
236 |
|
237 |
gr.Markdown("## ⚙️ 2. Configure The Forge")
|
238 |
-
|
239 |
-
choices=list(
|
240 |
-
value=
|
241 |
-
label="Select LLM Core Model"
|
|
|
242 |
)
|
243 |
-
|
244 |
|
245 |
-
with gr.Accordion("Advanced LLM Parameters", open=False):
|
246 |
-
# ... (temp and max_tokens sliders - same as before) ...
|
247 |
with gr.Row():
|
248 |
-
|
249 |
-
|
250 |
with gr.Row():
|
251 |
-
|
252 |
-
|
253 |
with gr.Row():
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
|
258 |
-
|
259 |
|
260 |
-
|
|
|
261 |
gr.Markdown("## 🔥 3. The Forge's Output")
|
262 |
-
with gr.Tabs():
|
263 |
-
with gr.TabItem("📜
|
264 |
-
|
265 |
-
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
|
266 |
-
|
267 |
-
with gr.TabItem("🌟 Evolved Artifact
|
268 |
-
|
269 |
-
|
270 |
-
with gr.TabItem("🛠️ Interaction Log (
|
271 |
-
|
272 |
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
|
277 |
-
]
|
278 |
-
|
279 |
-
submit_btn.click(
|
280 |
-
fn=run_algoforge_simulation,
|
281 |
inputs=[
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
],
|
288 |
-
outputs=
|
|
|
|
|
|
|
|
|
289 |
)
|
|
|
290 |
gr.Markdown("---")
|
291 |
gr.Markdown(
|
292 |
-
"**Disclaimer:**
|
293 |
-
"
|
|
|
|
|
|
|
294 |
)
|
295 |
|
296 |
-
# --- Entry Point ---
|
297 |
if __name__ == "__main__":
|
298 |
print("="*80)
|
299 |
-
print("AlgoForge Prime™ (Modular Version)
|
300 |
-
|
301 |
-
print(f"
|
302 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
print("="*80)
|
304 |
-
|
|
|
|
1 |
# algoforge_prime/app.py
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
+
import time # For progress updates
|
5 |
|
6 |
+
# --- Core Logic Imports ---
|
7 |
+
# Initialize clients first to ensure API keys are loaded before other modules use them.
|
8 |
+
from core.llm_clients import initialize_all_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
|
9 |
+
initialize_all_clients() # Call initialization once when the app starts
|
10 |
|
11 |
from core.generation_engine import generate_initial_solutions
|
12 |
+
from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult # Class for typed results
|
13 |
from core.evolution_engine import evolve_solution
|
14 |
+
from prompts.system_prompts import get_system_prompt # For specific roles like test explainer
|
15 |
+
from prompts.prompt_templates import format_code_test_analysis_user_prompt
|
16 |
|
17 |
+
# --- Application Configuration (Models, Defaults) ---
|
18 |
+
AVAILABLE_MODELS_CONFIG = {}
|
19 |
+
UI_DEFAULT_MODEL_KEY = None
|
20 |
|
21 |
+
# Populate with Gemini models if API is configured
|
22 |
if GEMINI_API_CONFIGURED:
|
23 |
+
AVAILABLE_MODELS_CONFIG.update({
|
24 |
"Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
|
25 |
"Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
|
26 |
})
|
27 |
+
UI_DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
|
28 |
|
29 |
+
# Populate with Hugging Face models if API is configured
|
30 |
if HF_API_CONFIGURED:
|
31 |
+
AVAILABLE_MODELS_CONFIG.update({
|
32 |
"Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
|
33 |
"Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
|
34 |
+
"CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"}, # Smaller CodeLlama
|
35 |
})
|
36 |
+
if not UI_DEFAULT_MODEL_KEY: # If Gemini isn't configured, default to an HF model
|
37 |
+
UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
|
38 |
+
|
39 |
+
# Absolute fallback if no models could be configured
|
40 |
+
if not AVAILABLE_MODELS_CONFIG:
|
41 |
+
print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets.")
|
42 |
+
AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys)"] = {"id": "dummy", "type": "none"}
|
43 |
+
UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys)"
|
44 |
+
elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG:
|
45 |
+
UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0] # Pick first available if default somehow not set
|
46 |
+
|
47 |
+
# --- Main Orchestration Logic for Gradio ---
|
48 |
+
def run_algoforge_simulation_orchestrator(
|
49 |
+
problem_type_selected: str,
|
50 |
+
problem_description_text: str,
|
51 |
+
initial_hints_text: str,
|
52 |
+
user_provided_tests_code: str,
|
53 |
+
num_initial_solutions_to_gen: int,
|
54 |
+
selected_model_ui_key: str,
|
55 |
+
genesis_temp: float, genesis_max_tokens: int,
|
56 |
+
critique_temp: float, critique_max_tokens: int,
|
57 |
+
evolution_temp: float, evolution_max_tokens: int,
|
58 |
+
progress=gr.Progress(track_tqdm=True) # Gradio progress tracker
|
59 |
):
|
60 |
progress(0, desc="Initializing AlgoForge Prime™...")
|
61 |
+
log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
|
62 |
+
start_time = time.time()
|
63 |
+
|
64 |
+
# Basic input validation
|
65 |
+
if not problem_description_text.strip():
|
66 |
+
error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
|
67 |
+
log_entries.append(error_msg)
|
68 |
+
return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs
|
69 |
+
|
70 |
+
current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
|
71 |
+
if not current_model_config or current_model_config["type"] == "none":
|
72 |
+
error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart."
|
73 |
+
log_entries.append(error_msg)
|
74 |
+
return error_msg, "", "", "\n".join(log_entries), ""
|
75 |
+
|
76 |
+
log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})")
|
77 |
+
log_entries.append(f"Problem Type: {problem_type_selected}")
|
78 |
+
log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")
|
79 |
+
|
80 |
+
# Prepare LLM configurations for each stage
|
81 |
+
llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
|
82 |
+
llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
|
83 |
+
llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
|
84 |
|
85 |
# --- STAGE 1: GENESIS ---
|
86 |
+
progress(0.1, desc="Stage 1: Genesis Engine - Generating Initial Solutions...")
|
87 |
+
log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**")
|
88 |
|
89 |
+
initial_raw_solutions = generate_initial_solutions(
|
90 |
+
problem_description_text, initial_hints_text, problem_type_selected,
|
91 |
+
num_initial_solutions_to_gen, llm_config_genesis
|
|
|
92 |
)
|
93 |
+
log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
|
94 |
+
for i, sol_text in enumerate(initial_raw_solutions):
|
95 |
+
log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...")
|
|
|
96 |
|
97 |
+
# --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
|
|
|
|
|
|
|
|
|
|
|
98 |
progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
|
99 |
+
log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**")
|
100 |
|
101 |
+
evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult}
|
102 |
+
|
103 |
+
for i, candidate_solution_text in enumerate(initial_raw_solutions):
|
104 |
+
current_progress = 0.3 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.35 # Progress for evaluation stage
|
105 |
+
progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
|
106 |
+
log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
# The evaluation_engine handles if candidate_solution_text itself is an error string
|
109 |
+
evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult
|
110 |
+
candidate_solution_text, problem_description_text, problem_type_selected,
|
111 |
+
user_provided_tests_code, llm_config_critique
|
112 |
+
)
|
113 |
+
|
114 |
+
log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10")
|
115 |
+
log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
|
116 |
+
if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}")
|
117 |
+
log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")
|
118 |
+
|
119 |
+
evaluated_candidates_list.append({
|
120 |
"id": i + 1,
|
121 |
+
"solution_text": candidate_solution_text, # Store original text, even if it was an error from genesis
|
122 |
+
"evaluation_result": evaluation_obj
|
123 |
})
|
124 |
|
125 |
+
# Format display for initial solutions & evaluations
|
126 |
+
initial_solutions_display_markdown = []
|
127 |
+
for data in evaluated_candidates_list:
|
128 |
+
initial_solutions_display_markdown.append(
|
129 |
+
f"**Candidate {data['id']}:**\n"
|
130 |
+
f"```python\n{data['solution_text']}\n```\n\n" # Assuming python for display, adjust if problem_type varies widely
|
131 |
+
f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n"
|
132 |
+
f"{data['evaluation_result'].critique_text}\n---"
|
133 |
)
|
134 |
|
135 |
+
# --- STAGE 3: SELECTION OF CHAMPION ---
|
136 |
+
progress(0.7, desc="Stage 3: Selecting Champion Candidate...")
|
137 |
+
log_entries.append("\n**------ STAGE 3: CHAMPION SELECTION ------**")
|
138 |
+
|
139 |
+
# Filter out candidates that were errors from genesis OR had very low evaluation scores (e.g., score of 0 from evaluation)
|
140 |
+
# We want to select a champion that is actually a piece of code/algorithm, not an error message.
|
141 |
+
potentially_viable_candidates = [
|
142 |
+
cand for cand in evaluated_candidates_list
|
143 |
+
if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \
|
144 |
+
cand["solution_text"] and not cand["solution_text"].startswith("ERROR")
|
145 |
+
]
|
146 |
+
|
147 |
+
if not potentially_viable_candidates:
|
148 |
+
final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low."
|
149 |
+
log_entries.append(f" CRITICAL: {final_error_msg}")
|
150 |
+
return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""
|
151 |
+
|
152 |
+
potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
|
153 |
+
champion_candidate_data = potentially_viable_candidates[0]
|
154 |
+
|
155 |
+
log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
|
156 |
+
f"(Solution Snippet: {champion_candidate_data['solution_text'][:60]}...) "
|
157 |
+
f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")
|
158 |
+
|
159 |
+
champion_display_markdown = (
|
160 |
+
f"**Champion Candidate ID: {champion_candidate_data['id']} "
|
161 |
+
f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n"
|
162 |
+
f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
|
163 |
+
f"**Original Comprehensive Evaluation for this Champion:**\n"
|
164 |
+
f"{champion_candidate_data['evaluation_result'].critique_text}"
|
165 |
)
|
166 |
|
167 |
+
# --- STAGE 4: EVOLUTIONARY FORGE ---
|
168 |
+
progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...")
|
169 |
+
log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**")
|
|
|
170 |
|
171 |
+
evolved_solution_code = evolve_solution(
|
172 |
+
champion_candidate_data["solution_text"],
|
173 |
+
champion_candidate_data["evaluation_result"].critique_text, # Pass the full critique
|
174 |
+
champion_candidate_data["evaluation_result"].score,
|
175 |
+
problem_description_text,
|
176 |
+
problem_type_selected,
|
177 |
+
llm_config_evolution
|
178 |
)
|
179 |
+
log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...")
|
180 |
|
181 |
+
evolved_solution_display_markdown = ""
|
182 |
+
ai_test_analysis_markdown = "" # For LLM explanation of unit test results of evolved code
|
183 |
|
184 |
+
if evolved_solution_code.startswith("ERROR"):
|
185 |
+
evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
|
186 |
else:
|
187 |
+
evolved_solution_display_markdown = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
|
188 |
+
|
189 |
+
# Optionally, re-evaluate the evolved solution with unit tests if provided and applicable
|
190 |
+
if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
|
191 |
+
progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...")
|
192 |
+
log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---")
|
193 |
+
|
194 |
+
# Use a low temperature for this critique to focus on test results rather than creative critique
|
195 |
+
# The critique part here is mostly for consistency, primary goal is test execution.
|
196 |
+
evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens}
|
197 |
+
|
198 |
+
evolved_code_eval_result = evaluate_solution_candidate( # type: EvaluationResult
|
199 |
+
evolved_solution_code, problem_description_text, problem_type_selected,
|
200 |
+
user_provided_tests_code, evolved_critique_config
|
201 |
)
|
202 |
+
|
203 |
+
evolved_solution_display_markdown += (
|
204 |
+
f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
|
205 |
+
f"{evolved_code_eval_result.execution_summary}\n"
|
206 |
+
f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n"
|
207 |
)
|
208 |
+
log_entries.append(f" Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. "
|
209 |
+
f"Summary: {evolved_code_eval_result.execution_summary}")
|
|
|
210 |
|
211 |
# Get LLM to explain the test results of the evolved code
|
212 |
+
if evolved_code_eval_result.total_tests > 0 : # Only if tests were run
|
213 |
+
progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
|
214 |
+
log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---")
|
215 |
+
analysis_user_prompt = format_code_test_analysis_user_prompt(
|
216 |
+
evolved_solution_code,
|
217 |
+
user_provided_tests_code,
|
218 |
+
evolved_code_eval_result.execution_summary # Pass the summary string
|
219 |
+
)
|
220 |
+
analysis_system_prompt = get_system_prompt("code_execution_explainer")
|
221 |
+
|
222 |
+
# Use a config for analysis - can be same as critique or specialized
|
223 |
+
llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
|
224 |
+
"temp": 0.3, "max_tokens": critique_max_tokens + 100} # A bit more tokens for explanation
|
225 |
|
226 |
+
from core.llm_clients import call_huggingface_api, call_gemini_api # Re-import for clarity or use a dispatcher
|
227 |
+
|
228 |
+
explanation_response_obj = None
|
229 |
+
if llm_analysis_config["type"] == "hf":
|
230 |
+
explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
|
231 |
+
elif llm_analysis_config["type"] == "google_gemini":
|
232 |
+
explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
|
233 |
+
|
234 |
+
if explanation_response_obj and explanation_response_obj.success:
|
235 |
+
ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
|
236 |
+
log_entries.append(f" AI Test Analysis (Snippet): {explanation_response_obj.text[:100]}...")
|
237 |
+
elif explanation_response_obj:
|
238 |
+
ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}"
|
239 |
+
log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}")
|
240 |
+
|
241 |
+
|
242 |
+
total_time = time.time() - start_time
|
243 |
+
log_entries.append(f"\n**AlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.**")
|
244 |
progress(1.0, desc="Cycle Complete!")
|
245 |
+
|
246 |
+
return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
|
247 |
+
|
248 |
|
249 |
+
# --- Gradio UI Definition ---
|
250 |
+
# (This section is largely similar to the previous app.py, with updated input/output connections)
|
251 |
|
|
|
252 |
intro_markdown = """
|
253 |
+
# ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v2)
|
254 |
+
This enhanced version uses a modular codebase and demonstrates a conceptual workflow for AI-assisted algorithm discovery,
|
255 |
+
featuring (simulated) unit testing for Python code if provided.
|
256 |
|
257 |
**API Keys Required in Space Secrets:**
|
258 |
+
- `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project.
|
259 |
- `HF_TOKEN` (Secondary): For Hugging Face hosted models.
|
260 |
+
If keys are missing or invalid, corresponding models will be unavailable.
|
261 |
"""
|
262 |
+
|
263 |
+
# Determine API status for UI message
|
264 |
+
ui_token_status_md = ""
|
265 |
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
|
266 |
+
ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>"
|
267 |
else:
|
268 |
+
if GEMINI_API_CONFIGURED:
|
269 |
+
ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>"
|
270 |
+
else:
|
271 |
+
ui_token_status_md += "<p style='color:orange;'>⚠️ **GOOGLE_API_KEY missing or failed to configure.** Gemini API models will be disabled.</p>"
|
272 |
+
|
273 |
+
if HF_API_CONFIGURED:
|
274 |
+
ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>"
|
275 |
+
else:
|
276 |
+
ui_token_status_md += "<p style='color:orange;'>⚠️ **HF_TOKEN missing or client failed to initialize.** Hugging Face models will be disabled.</p>"
|
277 |
|
278 |
|
279 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="AlgoForge Prime™ Modular v2") as app_demo:
|
280 |
gr.Markdown(intro_markdown)
|
281 |
+
gr.HTML(ui_token_status_md)
|
282 |
|
283 |
+
if not AVAILABLE_MODELS_CONFIG or UI_DEFAULT_MODEL_KEY == "No Models Available (Check API Keys)":
|
284 |
+
gr.Markdown("<h2 style='color:red;'>No LLM models are available. Please check your API key configurations in this Space's Secrets and restart the Space. The application cannot function without at least one working API configuration.</h2>")
|
285 |
else:
|
286 |
with gr.Row():
|
287 |
+
# Input Column
|
288 |
+
with gr.Column(scale=2): # Input column slightly wider
|
289 |
gr.Markdown("## 💡 1. Define the Challenge")
|
290 |
+
problem_type_dropdown = gr.Dropdown(
|
291 |
+
choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"],
|
292 |
+
label="Type of Problem / Algorithm", value="Python Algorithm with Tests",
|
293 |
+
info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below."
|
294 |
)
|
295 |
+
problem_description_textbox = gr.Textbox(
|
296 |
+
lines=5, label="Problem Description / Desired Outcome",
|
297 |
+
placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
|
298 |
)
|
299 |
+
initial_hints_textbox = gr.Textbox(
|
300 |
+
lines=3, label="Initial Thoughts / Constraints / Seed Ideas (Optional)",
|
301 |
+
placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
|
302 |
)
|
303 |
+
user_tests_textbox = gr.Textbox(
|
304 |
+
lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
|
305 |
+
placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# try: calculate_factorial(-1); assert False # Expected ValueError\n# except ValueError: assert True",
|
306 |
+
info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
|
|
|
307 |
)
|
308 |
|
309 |
gr.Markdown("## ⚙️ 2. Configure The Forge")
|
310 |
+
model_selection_dropdown = gr.Dropdown(
|
311 |
+
choices=list(AVAILABLE_MODELS_CONFIG.keys()),
|
312 |
+
value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None),
|
313 |
+
label="Select LLM Core Model",
|
314 |
+
info="Ensure the corresponding API key (Google or HF) is configured in secrets."
|
315 |
)
|
316 |
+
num_initial_solutions_slider = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)")
|
317 |
|
318 |
+
with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False):
|
|
|
319 |
with gr.Row():
|
320 |
+
genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.") # Gemini range often 0-1
|
321 |
+
genesis_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=768, step=64, label="Genesis Max Output Tokens")
|
322 |
with gr.Row():
|
323 |
+
critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
|
324 |
+
critique_max_tokens_slider = gr.Slider(minimum=150, maximum=1024, value=512, step=64, label="Critique Max Output Tokens")
|
325 |
with gr.Row():
|
326 |
+
evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
|
327 |
+
evolution_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=1024, step=64, label="Evolution Max Output Tokens")
|
|
|
328 |
|
329 |
+
engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")
|
330 |
|
331 |
+
# Output Column
|
332 |
+
with gr.Column(scale=3): # Output column wider
|
333 |
gr.Markdown("## 🔥 3. The Forge's Output")
|
334 |
+
with gr.Tabs(elem_id="output_tabs_elem"):
|
335 |
+
with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"):
|
336 |
+
output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations")
|
337 |
+
with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"):
|
338 |
+
output_champion_markdown = gr.Markdown(label="Top Pick for Refinement")
|
339 |
+
with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"):
|
340 |
+
output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge")
|
341 |
+
output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
|
342 |
+
with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
|
343 |
+
output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
|
344 |
|
345 |
+
# Connect button to the orchestration function
|
346 |
+
engage_button.click(
|
347 |
+
fn=run_algoforge_simulation_orchestrator,
|
|
|
|
|
|
|
|
|
|
|
348 |
inputs=[
|
349 |
+
problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox,
|
350 |
+
num_initial_solutions_slider, model_selection_dropdown,
|
351 |
+
genesis_temp_slider, genesis_max_tokens_slider,
|
352 |
+
critique_temp_slider, critique_max_tokens_slider,
|
353 |
+
evolution_temp_slider, evolution_max_tokens_slider
|
354 |
],
|
355 |
+
outputs=[
|
356 |
+
output_initial_solutions_markdown, output_champion_markdown,
|
357 |
+
output_evolved_markdown, output_interaction_log_markdown,
|
358 |
+
output_ai_test_analysis_markdown
|
359 |
+
]
|
360 |
)
|
361 |
+
|
362 |
gr.Markdown("---")
|
363 |
gr.Markdown(
|
364 |
+
"**Disclaimer:** This is a conceptual, educational demonstration. "
|
365 |
+
"The (simulated) unit testing feature is for illustrative purposes. "
|
366 |
+
"**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
|
367 |
+
"Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
|
368 |
+
"LLM outputs always require careful human review and verification."
|
369 |
)
|
370 |
|
371 |
+
# --- Entry Point for Running the Gradio App ---
|
372 |
if __name__ == "__main__":
|
373 |
print("="*80)
|
374 |
+
print("AlgoForge Prime™ (Modular Version v2 with Simulated Testing) - Launching...")
|
375 |
+
|
376 |
+
print(f" Google Gemini API Configured: {GEMINI_API_CONFIGURED}")
|
377 |
+
print(f" Hugging Face API Configured: {HF_API_CONFIGURED}")
|
378 |
+
|
379 |
+
if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
|
380 |
+
print(" CRITICAL WARNING: No API keys seem to be configured. The application will likely be non-functional.")
|
381 |
+
|
382 |
+
print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
|
383 |
+
print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
|
384 |
print("="*80)
|
385 |
+
|
386 |
+
app_demo.launch(debug=True, server_name="0.0.0.0") # server_name="0.0.0.0" is often needed for Docker/Spaces
|