mgbam commited on
Commit
bebdc57
·
verified ·
1 Parent(s): 959fea7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +299 -217
app.py CHANGED
@@ -1,304 +1,386 @@
1
  # algoforge_prime/app.py
2
  import gradio as gr
3
  import os
 
4
 
5
- # Initialize core components first (important for loading API keys etc.)
6
- # This needs to happen before other core modules try to use the status
7
- from core.llm_clients import initialize_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
8
- initialize_clients() # Explicitly initialize
9
 
10
  from core.generation_engine import generate_initial_solutions
11
- from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult
12
  from core.evolution_engine import evolve_solution
13
- # from prompts.system_prompts import get_system_prompt # Might not be needed directly here if core modules handle it
 
14
 
15
- # --- MODEL DEFINITIONS (can also be moved to a config file/module later) ---
16
- AVAILABLE_MODELS = {}
17
- DEFAULT_MODEL_KEY = None
18
 
 
19
  if GEMINI_API_CONFIGURED:
20
- AVAILABLE_MODELS.update({
21
  "Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
22
  "Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
23
  })
24
- DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
25
 
 
26
  if HF_API_CONFIGURED:
27
- AVAILABLE_MODELS.update({
28
  "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
29
  "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
 
30
  })
31
- if not DEFAULT_MODEL_KEY:
32
- DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
33
-
34
- if not AVAILABLE_MODELS:
35
- AVAILABLE_MODELS["No Models Available"] = {"id": "dummy", "type": "none"}
36
- DEFAULT_MODEL_KEY = "No Models Available"
37
- elif not DEFAULT_MODEL_KEY:
38
- DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS.keys())[0]
39
-
40
-
41
- # --- Main Orchestration Logic ---
42
- def run_algoforge_simulation(
43
- problem_type, problem_description, initial_hints, user_tests_string, # New input: user_tests_string
44
- num_initial_solutions, selected_model_key,
45
- gen_temp, gen_max_tokens,
46
- eval_temp, eval_max_tokens,
47
- evolve_temp, evolve_max_tokens,
48
- progress=gr.Progress(track_tqdm=True) # Gradio progress bar
 
 
 
 
 
49
  ):
50
  progress(0, desc="Initializing AlgoForge Prime™...")
51
- log_entries = [f"**AlgoForge Prime™ Cycle Starting...**"]
52
-
53
- if not problem_description:
54
- return "ERROR: Problem Description is mandatory.", "", "", "", ""
55
-
56
- model_config = AVAILABLE_MODELS.get(selected_model_key)
57
- if not model_config or model_config["type"] == "none":
58
- return f"ERROR: No valid model selected ('{selected_model_key}'). Check API key configs.", "", "", "", ""
59
-
60
- log_entries.append(f"Selected Model: {selected_model_key} (Type: {model_config['type']}, ID: {model_config['id']})")
61
- log_entries.append(f"Problem Type: {problem_type}, User Tests Provided: {'Yes' if user_tests_string else 'No'}")
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # --- STAGE 1: GENESIS ---
64
- progress(0.1, desc="Stage 1: Genesis Engine - Generating Solutions...")
65
- log_entries.append("\n**Stage 1: Genesis Engine**")
66
 
67
- llm_gen_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": gen_temp, "max_tokens": gen_max_tokens}
68
- initial_solution_texts = generate_initial_solutions(
69
- problem_description, initial_hints, problem_type,
70
- num_initial_solutions, llm_gen_config
71
  )
72
- log_entries.append(f"Generated {len(initial_solution_texts)} raw solution candidates.")
73
- for i, sol_text in enumerate(initial_solution_texts):
74
- log_entries.append(f" Candidate {i+1} (Snippet): {str(sol_text)[:100]}...")
75
-
76
 
77
- valid_initial_solutions = [s for s in initial_solution_texts if s and not s.startswith("ERROR")]
78
- if not valid_initial_solutions:
79
- error_summary = "\n".join(set(s for s in initial_solution_texts if s and s.startswith("ERROR")))
80
- return f"No valid solutions generated by Genesis Engine. Errors:\n{error_summary}", "", "", "\n".join(log_entries), ""
81
-
82
- # --- STAGE 2: CRITIQUE & EVALUATION ---
83
  progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
84
- log_entries.append("\n**Stage 2: Critique Crucible & Automated Evaluation**")
85
 
86
- evaluated_candidates_data = []
87
- llm_eval_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": eval_temp, "max_tokens": eval_max_tokens}
88
-
89
- for i, sol_text in enumerate(initial_solution_texts): # Evaluate all, even errors, to show the error
90
- progress(0.3 + (i / num_initial_solutions) * 0.4, desc=f"Evaluating Candidate {i+1}...")
91
- log_entries.append(f"\nEvaluating Candidate {i+1}:")
92
- if sol_text.startswith("ERROR"):
93
- eval_res = EvaluationResult(score=0, critique=f"Candidate was an error from Genesis: {sol_text}")
94
- log_entries.append(f" Skipping detailed evaluation for error: {sol_text}")
95
- else:
96
- eval_res = evaluate_solution_candidate(
97
- sol_text, problem_description, problem_type, user_tests_string, llm_eval_config
98
- )
99
- log_entries.append(f" LLM Critique & Test Score: {eval_res.score}/10")
100
- log_entries.append(f" Test Results: {eval_res.passed_tests}/{eval_res.total_tests} passed.")
101
- if eval_res.execution_error: log_entries.append(f" Execution Error: {eval_res.execution_error}")
102
- log_entries.append(f" Full Critique (Snippet): {str(eval_res.critique)[:150]}...")
103
 
104
- evaluated_candidates_data.append({
 
 
 
 
 
 
 
 
 
 
 
105
  "id": i + 1,
106
- "solution_text": sol_text,
107
- "evaluation": eval_res
108
  })
109
 
110
- # Format display for initial solutions
111
- initial_solutions_display_md = []
112
- for data in evaluated_candidates_data:
113
- initial_solutions_display_md.append(
114
- f"**Candidate {data['id']}:**\n```python\n{data['solution_text']}\n```\n"
115
- f"**Evaluation Verdict (Score: {data['evaluation'].score}/10):**\n{data['evaluation'].critique}\n---"
 
 
116
  )
117
 
118
- # --- STAGE 3: SELECTION ---
119
- progress(0.75, desc="Stage 3: Selecting Champion...")
120
- # Filter out candidates that were errors from genesis before sorting by score
121
- valid_evaluated_candidates = [cand for cand in evaluated_candidates_data if not cand['solution_text'].startswith("ERROR")]
122
- if not valid_evaluated_candidates:
123
- return "\n\n".join(initial_solutions_display_md), "No valid candidates to select from after evaluation.", "", "\n".join(log_entries), ""
124
-
125
- valid_evaluated_candidates.sort(key=lambda x: x["evaluation"].score, reverse=True)
126
- best_candidate_data = valid_evaluated_candidates[0]
127
- log_entries.append(f"\n**Stage 3: Champion Selected**\nCandidate {best_candidate_data['id']} chosen with score {best_candidate_data['evaluation'].score}/10.")
128
-
129
- best_solution_display_md = (
130
- f"**Champion Candidate {best_candidate_data['id']} (Original Score: {best_candidate_data['evaluation'].score}/10):**\n"
131
- f"```python\n{best_candidate_data['solution_text']}\n```\n"
132
- f"**Original Comprehensive Evaluation:**\n{best_candidate_data['evaluation'].critique}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
134
 
135
- # --- STAGE 4: EVOLUTION ---
136
- progress(0.8, desc="Stage 4: Evolutionary Forge - Refining Champion...")
137
- log_entries.append("\n**Stage 4: Evolutionary Forge**")
138
- llm_evolve_config = {"type": model_config["type"], "model_id": model_config["id"], "temp": evolve_temp, "max_tokens": evolve_max_tokens}
139
 
140
- evolved_solution_text = evolve_solution(
141
- best_candidate_data["solution_text"],
142
- str(best_candidate_data["evaluation"].critique), # Pass the full critique including test results
143
- best_candidate_data["evaluation"].score,
144
- problem_description,
145
- problem_type,
146
- llm_evolve_config
147
  )
148
- log_entries.append(f"Evolved solution text (Snippet): {str(evolved_solution_text)[:150]}...")
149
 
150
- evolved_solution_display_md = ""
151
- final_thoughts_md = "" # For LLM explanation of unit test results if needed
152
 
153
- if evolved_solution_text.startswith("ERROR"):
154
- evolved_solution_display_md = f"**Evolution Failed:**\n{evolved_solution_text}"
155
  else:
156
- evolved_solution_display_md = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_text}\n```"
157
- # Optionally, re-evaluate the evolved solution with unit tests if provided
158
- if "python" in problem_type.lower() and user_tests_string:
159
- progress(0.9, desc="Re-evaluating Evolved Solution with Tests...")
160
- log_entries.append("\n**Post-Evolution Sanity Check (Re-running Tests on Evolved Code)**")
161
- # Using a neutral LLM config for this, or could be separate
162
- # This evaluation is primarily for the test results, not another LLM critique of the evolved code
163
- evolved_eval_res = evaluate_solution_candidate(
164
- evolved_solution_text, problem_description, problem_type, user_tests_string,
165
- {"type": model_config["type"], "model_id": model_config["id"], "temp": 0.1, "max_tokens": eval_max_tokens} # Low temp for focused test eval
 
 
 
 
166
  )
167
- evolved_solution_display_md += (
168
- f"\n\n**Post-Evolution Test Results (Simulated):**\n"
169
- f"Passed: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests}\n"
 
 
170
  )
171
- if evolved_eval_res.execution_error:
172
- evolved_solution_display_md += f"Execution Output/Error: {evolved_eval_res.execution_error}\n"
173
- log_entries.append(f" Evolved Code Test Results: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed.")
174
 
175
  # Get LLM to explain the test results of the evolved code
176
- # progress(0.95, desc="Explaining Evolved Code Test Results...")
177
- # explain_prompt = f"The following Python code was generated: \n```python\n{evolved_solution_text}\n```\nIt was tested against these assertions:\n```python\n{user_tests_string}\n```\nThe test outcome was: {evolved_eval_res.passed_tests}/{evolved_eval_res.total_tests} passed. \nExecution/Error details: {evolved_eval_res.execution_error}\n\nProvide a brief analysis of these test results for the given code."
178
- # explain_sys_prompt = get_system_prompt("code_execution_explainer")
179
- # explanation_response = dispatch_llm_call_simplified(explain_prompt, explain_sys_prompt, llm_evolve_config) # Need a simplified dispatcher or use the full one
180
- # final_thoughts_md = f"**AI Analysis of Evolved Code's Test Results:**\n{explanation_response}"
181
-
 
 
 
 
 
 
 
182
 
183
- log_entries.append("\n**AlgoForge Prime™ Cycle Complete.**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  progress(1.0, desc="Cycle Complete!")
185
- return "\n\n".join(initial_solutions_display_md), best_solution_display_md, evolved_solution_display_md, "\n".join(log_entries), final_thoughts_md
 
 
186
 
 
 
187
 
188
- # --- GRADIO UI (largely similar, but with a new input for user tests) ---
189
  intro_markdown = """
190
- # ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution
191
- This enhanced version demonstrates a more structured approach to AI-assisted algorithm discovery,
192
- featuring basic (simulated) unit testing for Python code.
193
 
194
  **API Keys Required in Space Secrets:**
195
- - `GOOGLE_API_KEY` (Primary): For Google Gemini API models.
196
  - `HF_TOKEN` (Secondary): For Hugging Face hosted models.
 
197
  """
198
- token_status_md = ""
 
 
199
  if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
200
- token_status_md = "<p style='color:red;'>⚠️ CRITICAL: NEITHER API IS CONFIGURED. APP WILL NOT FUNCTION.</p>"
201
  else:
202
- if GEMINI_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected.</p>"
203
- else: token_status_md += "<p style='color:orange;'>⚠️ GOOGLE_API_KEY missing/failed. Gemini models disabled.</p>"
204
- if HF_API_CONFIGURED: token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected.</p>"
205
- else: token_status_md += "<p style='color:orange;'>⚠️ HF_TOKEN missing/failed. Hugging Face models disabled.</p>"
 
 
 
 
 
206
 
207
 
208
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan"), title="AlgoForge Prime™ Modular") as demo:
209
  gr.Markdown(intro_markdown)
210
- gr.HTML(token_status_md)
211
 
212
- if not AVAILABLE_MODELS or DEFAULT_MODEL_KEY == "No Models Available":
213
- gr.Markdown("<h2 style='color:red;'>No models are available. Check API keys and restart.</h2>")
214
  else:
215
  with gr.Row():
216
- with gr.Column(scale=2): # Made input column wider
 
217
  gr.Markdown("## 💡 1. Define the Challenge")
218
- problem_type_dd = gr.Dropdown(
219
- ["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design"],
220
- label="Type of Problem/Algorithm", value="Python Algorithm with Tests"
 
221
  )
222
- problem_desc_tb = gr.Textbox(
223
- lines=4, label="Problem Description / Desired Outcome",
224
- placeholder="e.g., 'Python function `is_palindrome(s: str) -> bool` that checks if a string is a palindrome, ignoring case and non-alphanumeric chars.'"
225
  )
226
- initial_hints_tb = gr.Textbox(
227
- lines=2, label="Initial Thoughts / Constraints (Optional)",
228
- placeholder="e.g., 'Iterative approach preferred.' or 'Handle empty strings.'"
229
  )
230
- # NEW INPUT for User Tests
231
- user_tests_tb = gr.Textbox(
232
- lines=5, label="Python Unit Tests (Optional, one `assert` per line)",
233
- placeholder="assert is_palindrome('Racecar!') == True\nassert is_palindrome('hello') == False\nassert is_palindrome('') == True",
234
- info="For 'Python Algorithm with Tests' type. Ignored otherwise."
235
  )
236
 
237
  gr.Markdown("## ⚙️ 2. Configure The Forge")
238
- model_select_dd = gr.Dropdown(
239
- choices=list(AVAILABLE_MODELS.keys()),
240
- value=DEFAULT_MODEL_KEY if DEFAULT_MODEL_KEY in AVAILABLE_MODELS else (list(AVAILABLE_MODELS.keys())[0] if AVAILABLE_MODELS else None),
241
- label="Select LLM Core Model"
 
242
  )
243
- num_solutions_slider = gr.Slider(1, 3, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)") # Max 3 for faster runs
244
 
245
- with gr.Accordion("Advanced LLM Parameters", open=False):
246
- # ... (temp and max_tokens sliders - same as before) ...
247
  with gr.Row():
248
- gen_temp_slider = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Genesis Temp")
249
- gen_max_tokens_slider = gr.Slider(200, 2048, value=768, step=64, label="Genesis Max Tokens")
250
  with gr.Row():
251
- eval_temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Crucible Temp")
252
- eval_max_tokens_slider = gr.Slider(150, 1024, value=512, step=64, label="Crucible Max Tokens")
253
  with gr.Row():
254
- evolve_temp_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Evolution Temp")
255
- evolve_max_tokens_slider = gr.Slider(200, 2048, value=1024, step=64, label="Evolution Max Tokens")
256
-
257
 
258
- submit_btn = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg")
259
 
260
- with gr.Column(scale=3): # Made output column wider
 
261
  gr.Markdown("## 🔥 3. The Forge's Output")
262
- with gr.Tabs():
263
- with gr.TabItem("📜 Genesis Candidates & Evaluations"):
264
- output_initial_solutions_md = gr.Markdown(label="Generated Solutions & Combined Evaluations")
265
- with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)"):
266
- output_best_solution_md = gr.Markdown(label="Top Pick for Refinement")
267
- with gr.TabItem("🌟 Evolved Artifact (& Test Analysis)"):
268
- output_evolved_solution_md = gr.Markdown(label="Refined Solution from Evolutionary Forge")
269
- # output_final_thoughts_md = gr.Markdown(label="AI Analysis of Evolved Code's Tests") # Optional separate output
270
- with gr.TabItem("🛠️ Interaction Log (Dev View)"):
271
- output_interaction_log_md = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
272
 
273
- outputs_list = [
274
- output_initial_solutions_md, output_best_solution_md,
275
- output_evolved_solution_md, output_interaction_log_md,
276
- gr.Markdown() # Placeholder for final_thoughts_md if you add it as a separate component
277
- ]
278
-
279
- submit_btn.click(
280
- fn=run_algoforge_simulation,
281
  inputs=[
282
- problem_type_dd, problem_desc_tb, initial_hints_tb, user_tests_tb, # Added user_tests_tb
283
- num_solutions_slider, model_select_dd,
284
- gen_temp_slider, gen_max_tokens_slider,
285
- eval_temp_slider, eval_max_tokens_slider,
286
- evolve_temp_slider, evolve_max_tokens_slider
287
  ],
288
- outputs=outputs_list
 
 
 
 
289
  )
 
290
  gr.Markdown("---")
291
  gr.Markdown(
292
- "**Disclaimer:** Modular demo. (Simulated) unit testing is illustrative. **NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
293
- "Real sandboxing is complex and critical for safety."
 
 
 
294
  )
295
 
296
- # --- Entry Point ---
297
  if __name__ == "__main__":
298
  print("="*80)
299
- print("AlgoForge Prime™ (Modular Version) Starting...")
300
- # ... (startup print messages for API key status - same as before) ...
301
- print(f"UI default model key: {DEFAULT_MODEL_KEY}")
302
- print(f"Available models for UI: {list(AVAILABLE_MODELS.keys())}")
 
 
 
 
 
 
303
  print("="*80)
304
- demo.launch(debug=True, server_name="0.0.0.0")
 
 
1
  # algoforge_prime/app.py
2
  import gradio as gr
3
  import os
4
+ import time # For progress updates
5
 
6
+ # --- Core Logic Imports ---
7
+ # Initialize clients first to ensure API keys are loaded before other modules use them.
8
+ from core.llm_clients import initialize_all_clients, GEMINI_API_CONFIGURED, HF_API_CONFIGURED
9
+ initialize_all_clients() # Call initialization once when the app starts
10
 
11
  from core.generation_engine import generate_initial_solutions
12
+ from core.evaluation_engine import evaluate_solution_candidate, EvaluationResult # Class for typed results
13
  from core.evolution_engine import evolve_solution
14
+ from prompts.system_prompts import get_system_prompt # For specific roles like test explainer
15
+ from prompts.prompt_templates import format_code_test_analysis_user_prompt
16
 
17
+ # --- Application Configuration (Models, Defaults) ---
18
+ AVAILABLE_MODELS_CONFIG = {}
19
+ UI_DEFAULT_MODEL_KEY = None
20
 
21
+ # Populate with Gemini models if API is configured
22
  if GEMINI_API_CONFIGURED:
23
+ AVAILABLE_MODELS_CONFIG.update({
24
  "Google Gemini 1.5 Flash (API - Fast, Recommended)": {"id": "gemini-1.5-flash-latest", "type": "google_gemini"},
25
  "Google Gemini 1.0 Pro (API)": {"id": "gemini-1.0-pro-latest", "type": "google_gemini"},
26
  })
27
+ UI_DEFAULT_MODEL_KEY = "Google Gemini 1.5 Flash (API - Fast, Recommended)"
28
 
29
+ # Populate with Hugging Face models if API is configured
30
  if HF_API_CONFIGURED:
31
+ AVAILABLE_MODELS_CONFIG.update({
32
  "Google Gemma 2B (HF - Quick Test)": {"id": "google/gemma-2b-it", "type": "hf"},
33
  "Mistral 7B Instruct (HF)": {"id": "mistralai/Mistral-7B-Instruct-v0.2", "type": "hf"},
34
+ "CodeLlama 7B Instruct (HF)": {"id": "codellama/CodeLlama-7b-Instruct-hf", "type": "hf"}, # Smaller CodeLlama
35
  })
36
+ if not UI_DEFAULT_MODEL_KEY: # If Gemini isn't configured, default to an HF model
37
+ UI_DEFAULT_MODEL_KEY = "Google Gemma 2B (HF - Quick Test)"
38
+
39
+ # Absolute fallback if no models could be configured
40
+ if not AVAILABLE_MODELS_CONFIG:
41
+ print("CRITICAL APP ERROR: No models could be configured. Check API keys in Space Secrets.")
42
+ AVAILABLE_MODELS_CONFIG["No Models Available (Check API Keys)"] = {"id": "dummy", "type": "none"}
43
+ UI_DEFAULT_MODEL_KEY = "No Models Available (Check API Keys)"
44
+ elif not UI_DEFAULT_MODEL_KEY and AVAILABLE_MODELS_CONFIG:
45
+ UI_DEFAULT_MODEL_KEY = list(AVAILABLE_MODELS_CONFIG.keys())[0] # Pick first available if default somehow not set
46
+
47
+ # --- Main Orchestration Logic for Gradio ---
48
+ def run_algoforge_simulation_orchestrator(
49
+ problem_type_selected: str,
50
+ problem_description_text: str,
51
+ initial_hints_text: str,
52
+ user_provided_tests_code: str,
53
+ num_initial_solutions_to_gen: int,
54
+ selected_model_ui_key: str,
55
+ genesis_temp: float, genesis_max_tokens: int,
56
+ critique_temp: float, critique_max_tokens: int,
57
+ evolution_temp: float, evolution_max_tokens: int,
58
+ progress=gr.Progress(track_tqdm=True) # Gradio progress tracker
59
  ):
60
  progress(0, desc="Initializing AlgoForge Prime™...")
61
+ log_entries = [f"**AlgoForge Prime™ Cycle Starting at {time.strftime('%Y-%m-%d %H:%M:%S')}**"]
62
+ start_time = time.time()
63
+
64
+ # Basic input validation
65
+ if not problem_description_text.strip():
66
+ error_msg = "CRITICAL INPUT ERROR: Problem Description is mandatory. Please describe the problem."
67
+ log_entries.append(error_msg)
68
+ return error_msg, "", "", "\n".join(log_entries), "" # Return 5 values for outputs
69
+
70
+ current_model_config = AVAILABLE_MODELS_CONFIG.get(selected_model_ui_key)
71
+ if not current_model_config or current_model_config["type"] == "none":
72
+ error_msg = f"CRITICAL CONFIG ERROR: No valid LLM selected ('{selected_model_ui_key}'). This usually means API keys are missing or failed to initialize. Check Space Secrets and restart."
73
+ log_entries.append(error_msg)
74
+ return error_msg, "", "", "\n".join(log_entries), ""
75
+
76
+ log_entries.append(f"Selected Model: {selected_model_ui_key} (Type: {current_model_config['type']}, ID: {current_model_config['id']})")
77
+ log_entries.append(f"Problem Type: {problem_type_selected}")
78
+ log_entries.append(f"User Unit Tests Provided: {'Yes' if user_provided_tests_code.strip() else 'No'}")
79
+
80
+ # Prepare LLM configurations for each stage
81
+ llm_config_genesis = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": genesis_temp, "max_tokens": genesis_max_tokens}
82
+ llm_config_critique = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": critique_temp, "max_tokens": critique_max_tokens}
83
+ llm_config_evolution = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": evolution_temp, "max_tokens": evolution_max_tokens}
84
 
85
  # --- STAGE 1: GENESIS ---
86
+ progress(0.1, desc="Stage 1: Genesis Engine - Generating Initial Solutions...")
87
+ log_entries.append("\n**------ STAGE 1: GENESIS ENGINE ------**")
88
 
89
+ initial_raw_solutions = generate_initial_solutions(
90
+ problem_description_text, initial_hints_text, problem_type_selected,
91
+ num_initial_solutions_to_gen, llm_config_genesis
 
92
  )
93
+ log_entries.append(f"Genesis Engine produced {len(initial_raw_solutions)} raw solution candidate(s).")
94
+ for i, sol_text in enumerate(initial_raw_solutions):
95
+ log_entries.append(f" Candidate {i+1} (Raw Snippet): {str(sol_text)[:120]}...")
 
96
 
97
+ # --- STAGE 2: CRITIQUE & AUTOMATED EVALUATION ---
 
 
 
 
 
98
  progress(0.3, desc="Stage 2: Critique Crucible - Evaluating Candidates...")
99
+ log_entries.append("\n**------ STAGE 2: CRITIQUE CRUCIBLE & AUTOMATED EVALUATION ------**")
100
 
101
+ evaluated_candidates_list = [] # Stores dicts: {"id": ..., "solution_text": ..., "evaluation_result": EvaluationResult}
102
+
103
+ for i, candidate_solution_text in enumerate(initial_raw_solutions):
104
+ current_progress = 0.3 + ( (i + 1) / num_initial_solutions_to_gen ) * 0.35 # Progress for evaluation stage
105
+ progress(current_progress, desc=f"Evaluating Candidate {i+1} of {num_initial_solutions_to_gen}...")
106
+ log_entries.append(f"\n--- Evaluating Candidate {i+1} ---")
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # The evaluation_engine handles if candidate_solution_text itself is an error string
109
+ evaluation_obj = evaluate_solution_candidate( # type: EvaluationResult
110
+ candidate_solution_text, problem_description_text, problem_type_selected,
111
+ user_provided_tests_code, llm_config_critique
112
+ )
113
+
114
+ log_entries.append(f" Final Combined Score: {evaluation_obj.score}/10")
115
+ log_entries.append(f" Automated Tests: {evaluation_obj.passed_tests}/{evaluation_obj.total_tests} passed.")
116
+ if evaluation_obj.execution_summary: log_entries.append(f" Execution Summary: {evaluation_obj.execution_summary}")
117
+ log_entries.append(f" LLM Critique (Snippet): {str(evaluation_obj.critique_text)[:150]}...")
118
+
119
+ evaluated_candidates_list.append({
120
  "id": i + 1,
121
+ "solution_text": candidate_solution_text, # Store original text, even if it was an error from genesis
122
+ "evaluation_result": evaluation_obj
123
  })
124
 
125
+ # Format display for initial solutions & evaluations
126
+ initial_solutions_display_markdown = []
127
+ for data in evaluated_candidates_list:
128
+ initial_solutions_display_markdown.append(
129
+ f"**Candidate {data['id']}:**\n"
130
+ f"```python\n{data['solution_text']}\n```\n\n" # Assuming python for display, adjust if problem_type varies widely
131
+ f"**Evaluation Verdict (Combined Score: {data['evaluation_result'].score}/10):**\n"
132
+ f"{data['evaluation_result'].critique_text}\n---"
133
  )
134
 
135
+ # --- STAGE 3: SELECTION OF CHAMPION ---
136
+ progress(0.7, desc="Stage 3: Selecting Champion Candidate...")
137
+ log_entries.append("\n**------ STAGE 3: CHAMPION SELECTION ------**")
138
+
139
+ # Filter out candidates that were errors from genesis OR had very low evaluation scores (e.g., score of 0 from evaluation)
140
+ # We want to select a champion that is actually a piece of code/algorithm, not an error message.
141
+ potentially_viable_candidates = [
142
+ cand for cand in evaluated_candidates_list
143
+ if cand["evaluation_result"] and cand["evaluation_result"].score > 0 and \
144
+ cand["solution_text"] and not cand["solution_text"].startswith("ERROR")
145
+ ]
146
+
147
+ if not potentially_viable_candidates:
148
+ final_error_msg = "No viable candidate solutions found after generation and evaluation. All attempts may have failed or scored too low."
149
+ log_entries.append(f" CRITICAL: {final_error_msg}")
150
+ return "\n\n".join(initial_solutions_display_markdown), final_error_msg, "", "\n".join(log_entries), ""
151
+
152
+ potentially_viable_candidates.sort(key=lambda x: x["evaluation_result"].score, reverse=True)
153
+ champion_candidate_data = potentially_viable_candidates[0]
154
+
155
+ log_entries.append(f"Champion Selected: Candidate {champion_candidate_data['id']} "
156
+ f"(Solution Snippet: {champion_candidate_data['solution_text'][:60]}...) "
157
+ f"with evaluation score {champion_candidate_data['evaluation_result'].score}/10.")
158
+
159
+ champion_display_markdown = (
160
+ f"**Champion Candidate ID: {champion_candidate_data['id']} "
161
+ f"(Original Combined Score: {champion_candidate_data['evaluation_result'].score}/10):**\n"
162
+ f"```python\n{champion_candidate_data['solution_text']}\n```\n\n"
163
+ f"**Original Comprehensive Evaluation for this Champion:**\n"
164
+ f"{champion_candidate_data['evaluation_result'].critique_text}"
165
  )
166
 
167
+ # --- STAGE 4: EVOLUTIONARY FORGE ---
168
+ progress(0.75, desc="Stage 4: Evolutionary Forge - Refining Champion...")
169
+ log_entries.append("\n**------ STAGE 4: EVOLUTIONARY FORGE ------**")
 
170
 
171
+ evolved_solution_code = evolve_solution(
172
+ champion_candidate_data["solution_text"],
173
+ champion_candidate_data["evaluation_result"].critique_text, # Pass the full critique
174
+ champion_candidate_data["evaluation_result"].score,
175
+ problem_description_text,
176
+ problem_type_selected,
177
+ llm_config_evolution
178
  )
179
+ log_entries.append(f"Raw Evolved Solution Text (Snippet): {str(evolved_solution_code)[:150]}...")
180
 
181
+ evolved_solution_display_markdown = ""
182
+ ai_test_analysis_markdown = "" # For LLM explanation of unit test results of evolved code
183
 
184
+ if evolved_solution_code.startswith("ERROR"):
185
+ evolved_solution_display_markdown = f"**Evolution Stage Failed:**\n{evolved_solution_code}"
186
  else:
187
+ evolved_solution_display_markdown = f"**✨ AlgoForge Prime™ Evolved Artifact ✨:**\n```python\n{evolved_solution_code}\n```"
188
+
189
+ # Optionally, re-evaluate the evolved solution with unit tests if provided and applicable
190
+ if "python" in problem_type_selected.lower() and user_provided_tests_code.strip():
191
+ progress(0.9, desc="Post-Evolution: Re-running Automated Tests on Evolved Code...")
192
+ log_entries.append("\n--- Post-Evolution Sanity Check (Automated Tests on Evolved Code) ---")
193
+
194
+ # Use a low temperature for this critique to focus on test results rather than creative critique
195
+ # The critique part here is mostly for consistency, primary goal is test execution.
196
+ evolved_critique_config = {"type": current_model_config["type"], "model_id": current_model_config["id"], "temp": 0.2, "max_tokens": critique_max_tokens}
197
+
198
+ evolved_code_eval_result = evaluate_solution_candidate( # type: EvaluationResult
199
+ evolved_solution_code, problem_description_text, problem_type_selected,
200
+ user_provided_tests_code, evolved_critique_config
201
  )
202
+
203
+ evolved_solution_display_markdown += (
204
+ f"\n\n**Post-Evolution Automated Test Results (Simulated):**\n"
205
+ f"{evolved_code_eval_result.execution_summary}\n"
206
+ f"Passed: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests}\n"
207
  )
208
+ log_entries.append(f" Evolved Code Test Results: {evolved_code_eval_result.passed_tests}/{evolved_code_eval_result.total_tests} passed. "
209
+ f"Summary: {evolved_code_eval_result.execution_summary}")
 
210
 
211
  # Get LLM to explain the test results of the evolved code
212
+ if evolved_code_eval_result.total_tests > 0 : # Only if tests were run
213
+ progress(0.95, desc="Post-Evolution: AI Analyzing Test Results...")
214
+ log_entries.append("\n--- AI Analysis of Evolved Code's Test Results ---")
215
+ analysis_user_prompt = format_code_test_analysis_user_prompt(
216
+ evolved_solution_code,
217
+ user_provided_tests_code,
218
+ evolved_code_eval_result.execution_summary # Pass the summary string
219
+ )
220
+ analysis_system_prompt = get_system_prompt("code_execution_explainer")
221
+
222
+ # Use a config for analysis - can be same as critique or specialized
223
+ llm_analysis_config = {"type": current_model_config["type"], "model_id": current_model_config["id"],
224
+ "temp": 0.3, "max_tokens": critique_max_tokens + 100} # A bit more tokens for explanation
225
 
226
+ from core.llm_clients import call_huggingface_api, call_gemini_api # Re-import for clarity or use a dispatcher
227
+
228
+ explanation_response_obj = None
229
+ if llm_analysis_config["type"] == "hf":
230
+ explanation_response_obj = call_huggingface_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
231
+ elif llm_analysis_config["type"] == "google_gemini":
232
+ explanation_response_obj = call_gemini_api(analysis_user_prompt, llm_analysis_config["model_id"], llm_analysis_config["temp"], llm_analysis_config["max_tokens"], analysis_system_prompt)
233
+
234
+ if explanation_response_obj and explanation_response_obj.success:
235
+ ai_test_analysis_markdown = f"**AI Analysis of Evolved Code's Test Performance:**\n{explanation_response_obj.text}"
236
+ log_entries.append(f" AI Test Analysis (Snippet): {explanation_response_obj.text[:100]}...")
237
+ elif explanation_response_obj:
238
+ ai_test_analysis_markdown = f"**AI Analysis of Test Performance Failed:**\n{explanation_response_obj.error}"
239
+ log_entries.append(f" AI Test Analysis Error: {explanation_response_obj.error}")
240
+
241
+
242
+ total_time = time.time() - start_time
243
+ log_entries.append(f"\n**AlgoForge Prime™ Cycle Complete. Total time: {total_time:.2f} seconds.**")
244
  progress(1.0, desc="Cycle Complete!")
245
+
246
+ return "\n\n".join(initial_solutions_display_markdown), champion_display_markdown, evolved_solution_display_markdown, "\n".join(log_entries), ai_test_analysis_markdown
247
+
248
 
249
+ # --- Gradio UI Definition ---
250
+ # (This section is largely similar to the previous app.py, with updated input/output connections)
251
 
 
252
  intro_markdown = """
253
+ # ✨ AlgoForge Prime™ ✨: Modular Algorithmic Evolution (v2)
254
+ This enhanced version uses a modular codebase and demonstrates a conceptual workflow for AI-assisted algorithm discovery,
255
+ featuring (simulated) unit testing for Python code if provided.
256
 
257
  **API Keys Required in Space Secrets:**
258
+ - `GOOGLE_API_KEY` (Primary): For Google Gemini API models. Ensure the "Generative Language API" (or similar) is enabled for your project.
259
  - `HF_TOKEN` (Secondary): For Hugging Face hosted models.
260
+ If keys are missing or invalid, corresponding models will be unavailable.
261
  """
262
+
263
+ # Determine API status for UI message
264
+ ui_token_status_md = ""
265
  if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
266
+ ui_token_status_md = "<p style='color:red;'>⚠️ **CRITICAL: NEITHER GOOGLE_API_KEY NOR HF_TOKEN are configured or working correctly.** The application will not be able to call any LLMs.</p>"
267
  else:
268
+ if GEMINI_API_CONFIGURED:
269
+ ui_token_status_md += "<p style='color:green;'>✅ Google Gemini API Key detected and configured.</p>"
270
+ else:
271
+ ui_token_status_md += "<p style='color:orange;'>⚠️ **GOOGLE_API_KEY missing or failed to configure.** Gemini API models will be disabled.</p>"
272
+
273
+ if HF_API_CONFIGURED:
274
+ ui_token_status_md += "<p style='color:green;'>✅ Hugging Face API Token detected and client initialized.</p>"
275
+ else:
276
+ ui_token_status_md += "<p style='color:orange;'>⚠️ **HF_TOKEN missing or client failed to initialize.** Hugging Face models will be disabled.</p>"
277
 
278
 
279
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"), title="AlgoForge Prime™ Modular v2") as app_demo:
280
  gr.Markdown(intro_markdown)
281
+ gr.HTML(ui_token_status_md)
282
 
283
+ if not AVAILABLE_MODELS_CONFIG or UI_DEFAULT_MODEL_KEY == "No Models Available (Check API Keys)":
284
+ gr.Markdown("<h2 style='color:red;'>No LLM models are available. Please check your API key configurations in this Space's Secrets and restart the Space. The application cannot function without at least one working API configuration.</h2>")
285
  else:
286
  with gr.Row():
287
+ # Input Column
288
+ with gr.Column(scale=2): # Input column slightly wider
289
  gr.Markdown("## 💡 1. Define the Challenge")
290
+ problem_type_dropdown = gr.Dropdown(
291
+ choices=["Python Algorithm with Tests", "Python Algorithm (Critique Only)", "General Algorithm Idea", "Conceptual System Design", "Pseudocode Refinement"],
292
+ label="Type of Problem / Algorithm", value="Python Algorithm with Tests",
293
+ info="Select '...with Tests' to enable (simulated) unit testing if you provide tests below."
294
  )
295
+ problem_description_textbox = gr.Textbox(
296
+ lines=5, label="Problem Description / Desired Outcome",
297
+ placeholder="Example for 'Python Algorithm with Tests':\n`def calculate_factorial(n: int) -> int:`\nCalculates factorial of n. Should handle n=0 (returns 1) and raise ValueError for n<0."
298
  )
299
+ initial_hints_textbox = gr.Textbox(
300
+ lines=3, label="Initial Thoughts / Constraints / Seed Ideas (Optional)",
301
+ placeholder="E.g., 'Prefer an iterative solution over recursive for factorial.' or 'Consider time complexity.'"
302
  )
303
+ user_tests_textbox = gr.Textbox(
304
+ lines=6, label="Python Unit Tests (Optional, one `assert` per line)",
305
+ placeholder="assert calculate_factorial(0) == 1\nassert calculate_factorial(5) == 120\n# try: calculate_factorial(-1); assert False # Expected ValueError\n# except ValueError: assert True",
306
+ info="For 'Python Algorithm with Tests'. Ensure function names match your problem description."
 
307
  )
308
 
309
  gr.Markdown("## ⚙️ 2. Configure The Forge")
310
+ model_selection_dropdown = gr.Dropdown(
311
+ choices=list(AVAILABLE_MODELS_CONFIG.keys()),
312
+ value=UI_DEFAULT_MODEL_KEY if UI_DEFAULT_MODEL_KEY in AVAILABLE_MODELS_CONFIG else (list(AVAILABLE_MODELS_CONFIG.keys())[0] if AVAILABLE_MODELS_CONFIG else None),
313
+ label="Select LLM Core Model",
314
+ info="Ensure the corresponding API key (Google or HF) is configured in secrets."
315
  )
316
+ num_initial_solutions_slider = gr.Slider(minimum=1, maximum=4, value=2, step=1, label="Number of Initial Solutions (Genesis Engine)")
317
 
318
+ with gr.Accordion("Advanced LLM Parameters (Expert Users)", open=False):
 
319
  with gr.Row():
320
+ genesis_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="Genesis Temp", info="Higher = more creative, Lower = more deterministic.") # Gemini range often 0-1
321
+ genesis_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=768, step=64, label="Genesis Max Output Tokens")
322
  with gr.Row():
323
+ critique_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.05, label="Critique Temp")
324
+ critique_max_tokens_slider = gr.Slider(minimum=150, maximum=1024, value=512, step=64, label="Critique Max Output Tokens")
325
  with gr.Row():
326
+ evolution_temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.75, step=0.05, label="Evolution Temp")
327
+ evolution_max_tokens_slider = gr.Slider(minimum=200, maximum=2048, value=1024, step=64, label="Evolution Max Output Tokens")
 
328
 
329
+ engage_button = gr.Button("🚀 ENGAGE ALGOFORGE PRIME™ 🚀", variant="primary", size="lg", elem_id="engage_button_elem")
330
 
331
+ # Output Column
332
+ with gr.Column(scale=3): # Output column wider
333
  gr.Markdown("## 🔥 3. The Forge's Output")
334
+ with gr.Tabs(elem_id="output_tabs_elem"):
335
+ with gr.TabItem("📜 Initial Candidates & Evaluations", id="tab_initial_evals"):
336
+ output_initial_solutions_markdown = gr.Markdown(label="Generated Solutions & Combined Evaluations")
337
+ with gr.TabItem("🏆 Champion Candidate (Pre-Evolution)", id="tab_champion"):
338
+ output_champion_markdown = gr.Markdown(label="Top Pick for Refinement")
339
+ with gr.TabItem("🌟 Evolved Artifact & Test Analysis", id="tab_evolved"):
340
+ output_evolved_markdown = gr.Markdown(label="Refined Solution from Evolutionary Forge")
341
+ output_ai_test_analysis_markdown = gr.Markdown(label="AI Analysis of Evolved Code's Test Performance")
342
+ with gr.TabItem("🛠️ Interaction Log (Developer View)", id="tab_log"):
343
+ output_interaction_log_markdown = gr.Markdown(label="Detailed Log of LLM Prompts & Responses")
344
 
345
+ # Connect button to the orchestration function
346
+ engage_button.click(
347
+ fn=run_algoforge_simulation_orchestrator,
 
 
 
 
 
348
  inputs=[
349
+ problem_type_dropdown, problem_description_textbox, initial_hints_textbox, user_tests_textbox,
350
+ num_initial_solutions_slider, model_selection_dropdown,
351
+ genesis_temp_slider, genesis_max_tokens_slider,
352
+ critique_temp_slider, critique_max_tokens_slider,
353
+ evolution_temp_slider, evolution_max_tokens_slider
354
  ],
355
+ outputs=[
356
+ output_initial_solutions_markdown, output_champion_markdown,
357
+ output_evolved_markdown, output_interaction_log_markdown,
358
+ output_ai_test_analysis_markdown
359
+ ]
360
  )
361
+
362
  gr.Markdown("---")
363
  gr.Markdown(
364
+ "**Disclaimer:** This is a conceptual, educational demonstration. "
365
+ "The (simulated) unit testing feature is for illustrative purposes. "
366
+ "**NEVER run LLM-generated code from an untrusted source in an unrestricted environment.** "
367
+ "Implementing robust and secure code sandboxing is complex and absolutely critical for safety in real-world applications. "
368
+ "LLM outputs always require careful human review and verification."
369
  )
370
 
371
+ # --- Entry Point for Running the Gradio App ---
372
  if __name__ == "__main__":
373
  print("="*80)
374
+ print("AlgoForge Prime™ (Modular Version v2 with Simulated Testing) - Launching...")
375
+
376
+ print(f" Google Gemini API Configured: {GEMINI_API_CONFIGURED}")
377
+ print(f" Hugging Face API Configured: {HF_API_CONFIGURED}")
378
+
379
+ if not GEMINI_API_CONFIGURED and not HF_API_CONFIGURED:
380
+ print(" CRITICAL WARNING: No API keys seem to be configured. The application will likely be non-functional.")
381
+
382
+ print(f" UI Default Model Key: {UI_DEFAULT_MODEL_KEY}")
383
+ print(f" Available models for UI: {list(AVAILABLE_MODELS_CONFIG.keys())}")
384
  print("="*80)
385
+
386
+ app_demo.launch(debug=True, server_name="0.0.0.0") # server_name="0.0.0.0" is often needed for Docker/Spaces