Spaces:
Sleeping
Sleeping
Update core/evolution_engine.py
Browse files- core/evolution_engine.py +67 -106
core/evolution_engine.py
CHANGED
@@ -1,119 +1,80 @@
|
|
1 |
-
# algoforge_prime/core/
|
2 |
-
|
3 |
|
4 |
-
|
5 |
-
#
|
6 |
-
from
|
7 |
-
from prompts.system_prompts import get_system_prompt #
|
8 |
-
from safe_executor import execute_python_code_with_tests, ExecutionResult # Assuming it's in the same 'core' package
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
self.combined_score = combined_score
|
13 |
-
self.llm_critique_text = llm_critique_text # LLM's qualitative assessment
|
14 |
-
self.execution_details = execution_details # Object from safe_executor
|
15 |
-
self.raw_llm_response = raw_llm_response
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
full_critique += f"\n\n**Automated Execution & Test Results (Simulated):**\n"
|
21 |
-
if self.execution_details.total_tests > 0:
|
22 |
-
full_critique += f" Tests Attempted: {self.execution_details.total_tests}\n"
|
23 |
-
full_critique += f" Tests Passed: {self.execution_details.passed_tests}\n"
|
24 |
-
if self.execution_details.error:
|
25 |
-
full_critique += f" Execution Error: {self.execution_details.error}\n"
|
26 |
-
elif self.execution_details.output:
|
27 |
-
full_critique += f" Execution Output (stdout):\n```\n{self.execution_details.output[:500]}\n```\n" # Limit output display
|
28 |
-
full_critique += f" Execution Time: {self.execution_details.execution_time:.4f}s\n"
|
29 |
-
return full_critique
|
30 |
|
|
|
31 |
|
32 |
-
def
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
try:
|
37 |
-
import re
|
38 |
-
match = re.search(r"Score:\s*(\d+)(?:\s*/\s*10)?", llm_text_output, re.IGNORECASE)
|
39 |
-
if match:
|
40 |
-
parsed_score_val = int(match.group(1))
|
41 |
-
score = max(1, min(parsed_score_val, 10))
|
42 |
-
else: score = random.randint(3, 6) # Fallback if no score marker
|
43 |
-
except Exception: score = random.randint(3, 5) # Fallback on any parsing error
|
44 |
-
return score
|
45 |
-
|
46 |
-
|
47 |
-
def evaluate_solution_candidate(
|
48 |
-
solution_text: str,
|
49 |
problem_description: str,
|
50 |
problem_type: str,
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
if llm_client_config["type"] == "hf":
|
67 |
-
llm_response_obj = call_huggingface_api(user_p_critique, llm_client_config["model_id"], llm_client_config["temp"], llm_client_config["max_tokens"], system_p_critique)
|
68 |
-
elif llm_client_config["type"] == "google_gemini":
|
69 |
-
llm_response_obj = call_gemini_api(user_p_critique, llm_client_config["model_id"], llm_client_config["temp"], llm_client_config["max_tokens"], system_p_critique)
|
70 |
-
|
71 |
-
if llm_response_obj:
|
72 |
-
raw_llm_critique_resp = llm_response_obj.raw_response
|
73 |
-
if llm_response_obj.success:
|
74 |
-
llm_critique_text = llm_response_obj.text
|
75 |
-
llm_score = _parse_llm_score(llm_critique_text)
|
76 |
-
else:
|
77 |
-
llm_critique_text = f"Error during LLM critique (Model: {llm_response_obj.model_id_used}): {llm_response_obj.error}"
|
78 |
-
llm_score = 0 # Penalize
|
79 |
-
elif solution_text and solution_text.startswith("ERROR"):
|
80 |
-
llm_critique_text = f"Solution was an error from Genesis: {solution_text}"
|
81 |
-
llm_score = 0
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
|
85 |
-
if "
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
)
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
else:
|
102 |
-
pass_ratio = execution_result_obj.passed_tests / execution_result_obj.total_tests
|
103 |
-
if pass_ratio == 1.0: # All tests passed
|
104 |
-
combined_score = min(10, llm_score + 2) # Significant bonus
|
105 |
-
elif pass_ratio >= 0.75: # Most tests passed
|
106 |
-
combined_score = min(10, llm_score + 1) # Small bonus
|
107 |
-
elif pass_ratio < 0.25: # Very few tests passed
|
108 |
-
combined_score = max(1, llm_score - 4)
|
109 |
-
else: # Some tests passed
|
110 |
-
combined_score = int(llm_score * (0.5 + 0.5 * pass_ratio)) # Weighted average
|
111 |
-
|
112 |
-
combined_score = max(1, min(10, combined_score)) # Clamp 1-10
|
113 |
|
114 |
-
|
115 |
-
combined_score=combined_score,
|
116 |
-
llm_critique_text=llm_critique_text,
|
117 |
-
execution_details=execution_result_obj,
|
118 |
-
raw_llm_response=raw_llm_critique_resp
|
119 |
-
)
|
|
|
1 |
+
# algoforge_prime/core/evolution_engine.py
|
2 |
+
print("DEBUG: Importing core.evolution_engine")
|
3 |
|
4 |
+
# --- Corrected Imports ---
|
5 |
+
# Absolute imports for modules outside the 'core' package
|
6 |
+
from prompts.system_prompts import get_system_prompt
|
|
|
|
|
7 |
|
8 |
+
# Absolute imports for other modules within the 'core' package (or relative for siblings)
|
9 |
+
from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
# Relative import for a sibling module within the 'core' package
|
12 |
+
# from .safe_executor import ExecutionResult # Not directly used in this module, but evaluation_output_obj might contain it
|
13 |
+
# from .evaluation_engine import EvaluationResultOutput # For type hinting the parameter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
print("DEBUG: core.evolution_engine - Imports successful")
|
16 |
|
17 |
+
def evolve_solution(
|
18 |
+
original_solution_text: str,
|
19 |
+
evaluation_output_obj, # This object comes from evaluation_engine and contains EvaluationResultOutput
|
20 |
+
# It will have a .get_display_critique() method and .combined_score attribute
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
problem_description: str,
|
22 |
problem_type: str,
|
23 |
+
llm_client_config: dict # {"type": ..., "model_id": ..., "temp": ..., "max_tokens": ...}
|
24 |
+
) -> str: # Returns evolved solution text or an error string
|
25 |
+
"""
|
26 |
+
Attempts to evolve a solution based on its comprehensive evaluation details.
|
27 |
+
"""
|
28 |
+
print(f"DEBUG: evolution_engine.py - Evolving solution. Problem type: {problem_type}")
|
29 |
+
system_p_evolve = get_system_prompt("evolution_general") # problem_type can be used for specialization here too
|
30 |
|
31 |
+
# Extract necessary info from the evaluation_output_obj
|
32 |
+
# This assumes evaluation_output_obj is an instance of EvaluationResultOutput from evaluation_engine.py
|
33 |
+
# or at least has these attributes/methods.
|
34 |
+
try:
|
35 |
+
critique_and_test_feedback = evaluation_output_obj.get_display_critique()
|
36 |
+
original_score = evaluation_output_obj.combined_score
|
37 |
+
except AttributeError as e:
|
38 |
+
print(f"ERROR: evolution_engine.py - evaluation_output_obj is missing expected attributes/methods: {e}")
|
39 |
+
# Fallback if the object structure is not as expected
|
40 |
+
critique_and_test_feedback = "Critique data was not in the expected format."
|
41 |
+
original_score = 0 # Assign a neutral score if real one can't be found
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
user_p_evolve = (
|
44 |
+
f"Original Problem Context: \"{problem_description}\"\n\n"
|
45 |
+
f"The solution to be evolved achieved a combined score of {original_score}/10.\n"
|
46 |
+
f"Here is the original solution text:\n```python\n{original_solution_text}\n```\n\n"
|
47 |
+
f"Here is the comprehensive evaluation it received (including LLM critique and automated test feedback if run):\n'''\n{critique_and_test_feedback}\n'''\n\n"
|
48 |
+
f"Your Task: Based on ALL the information above (solution, LLM critique, and crucially any test execution results/errors mentioned in the evaluation), "
|
49 |
+
f"evolve the provided solution to make it demonstrably superior. "
|
50 |
+
f"Prioritize fixing any reported execution errors or failed tests. "
|
51 |
+
f"Then, address other critique points like efficiency, clarity, or completeness. "
|
52 |
+
f"Output the *complete evolved solution*. "
|
53 |
+
f"Follow this with a brief explanation of the key changes and improvements you implemented, especially how you addressed test failures or execution issues."
|
54 |
+
)
|
55 |
|
56 |
+
llm_response_obj = None # type: LLMResponse
|
57 |
+
if llm_client_config["type"] == "hf":
|
58 |
+
llm_response_obj = call_huggingface_api(
|
59 |
+
user_p_evolve, llm_client_config["model_id"],
|
60 |
+
temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
|
61 |
+
system_prompt_text=system_p_evolve
|
62 |
)
|
63 |
+
elif llm_client_config["type"] == "google_gemini":
|
64 |
+
llm_response_obj = call_gemini_api(
|
65 |
+
user_p_evolve, llm_client_config["model_id"],
|
66 |
+
temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
|
67 |
+
system_prompt_text=system_p_evolve
|
68 |
+
)
|
69 |
+
else:
|
70 |
+
error_msg = f"ERROR (Evolution): Unknown LLM client type '{llm_client_config['type']}'"
|
71 |
+
print(f"ERROR: evolution_engine.py - {error_msg}")
|
72 |
+
return error_msg
|
73 |
|
74 |
+
if llm_response_obj.success:
|
75 |
+
return llm_response_obj.text
|
76 |
+
else:
|
77 |
+
# Error is already logged by call_..._api functions if it's from there
|
78 |
+
return f"ERROR (Evolution with {llm_response_obj.model_id_used}): {llm_response_obj.error}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
print("DEBUG: core.evolution_engine - Module fully defined.")
|
|
|
|
|
|
|
|
|
|