mgbam commited on
Commit
dd9dfb2
·
verified ·
1 Parent(s): cbf1fef

Update core/evolution_engine.py

Browse files
Files changed (1) hide show
  1. core/evolution_engine.py +67 -106
core/evolution_engine.py CHANGED
@@ -1,119 +1,80 @@
1
- # algoforge_prime/core/evaluation_engine.py
2
- import random
3
 
4
- from prompts.prompt_templates import format_critique_user_prompt
5
- # Import our (simulated) safe executor
6
- from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse # Absolute
7
- from prompts.system_prompts import get_system_prompt #
8
- from safe_executor import execute_python_code_with_tests, ExecutionResult # Assuming it's in the same 'core' package
9
 
10
- class EvaluationResultOutput: # Renamed to avoid conflict with safe_executor.ExecutionResult
11
- def __init__(self, combined_score=0, llm_critique_text="", execution_details: ExecutionResult = None, raw_llm_response=None):
12
- self.combined_score = combined_score
13
- self.llm_critique_text = llm_critique_text # LLM's qualitative assessment
14
- self.execution_details = execution_details # Object from safe_executor
15
- self.raw_llm_response = raw_llm_response
16
 
17
- def get_display_critique(self):
18
- full_critique = self.llm_critique_text if self.llm_critique_text else "LLM critique was not performed or failed."
19
- if self.execution_details:
20
- full_critique += f"\n\n**Automated Execution & Test Results (Simulated):**\n"
21
- if self.execution_details.total_tests > 0:
22
- full_critique += f" Tests Attempted: {self.execution_details.total_tests}\n"
23
- full_critique += f" Tests Passed: {self.execution_details.passed_tests}\n"
24
- if self.execution_details.error:
25
- full_critique += f" Execution Error: {self.execution_details.error}\n"
26
- elif self.execution_details.output:
27
- full_critique += f" Execution Output (stdout):\n```\n{self.execution_details.output[:500]}\n```\n" # Limit output display
28
- full_critique += f" Execution Time: {self.execution_details.execution_time:.4f}s\n"
29
- return full_critique
30
 
 
31
 
32
- def _parse_llm_score(llm_text_output: str) -> int:
33
- # ... (keep your existing _parse_score_from_llm_text, renamed for clarity) ...
34
- score = 0
35
- if not llm_text_output or not isinstance(llm_text_output, str): return score
36
- try:
37
- import re
38
- match = re.search(r"Score:\s*(\d+)(?:\s*/\s*10)?", llm_text_output, re.IGNORECASE)
39
- if match:
40
- parsed_score_val = int(match.group(1))
41
- score = max(1, min(parsed_score_val, 10))
42
- else: score = random.randint(3, 6) # Fallback if no score marker
43
- except Exception: score = random.randint(3, 5) # Fallback on any parsing error
44
- return score
45
-
46
-
47
- def evaluate_solution_candidate(
48
- solution_text: str,
49
  problem_description: str,
50
  problem_type: str,
51
- user_provided_tests_code: str,
52
- llm_client_config: dict
53
- ) -> EvaluationResultOutput:
 
 
 
 
54
 
55
- llm_critique_text = "LLM critique generation failed or was skipped."
56
- llm_score = 0
57
- raw_llm_critique_resp = None
58
- execution_result_obj = None # type: ExecutionResult
59
-
60
- # 1. LLM-based Critique (only if solution_text is not an error itself)
61
- if solution_text and not solution_text.startswith("ERROR"):
62
- system_p_critique = get_system_prompt("critique_general")
63
- user_p_critique = format_critique_user_prompt(problem_description, solution_text)
64
-
65
- llm_response_obj = None
66
- if llm_client_config["type"] == "hf":
67
- llm_response_obj = call_huggingface_api(user_p_critique, llm_client_config["model_id"], llm_client_config["temp"], llm_client_config["max_tokens"], system_p_critique)
68
- elif llm_client_config["type"] == "google_gemini":
69
- llm_response_obj = call_gemini_api(user_p_critique, llm_client_config["model_id"], llm_client_config["temp"], llm_client_config["max_tokens"], system_p_critique)
70
-
71
- if llm_response_obj:
72
- raw_llm_critique_resp = llm_response_obj.raw_response
73
- if llm_response_obj.success:
74
- llm_critique_text = llm_response_obj.text
75
- llm_score = _parse_llm_score(llm_critique_text)
76
- else:
77
- llm_critique_text = f"Error during LLM critique (Model: {llm_response_obj.model_id_used}): {llm_response_obj.error}"
78
- llm_score = 0 # Penalize
79
- elif solution_text and solution_text.startswith("ERROR"):
80
- llm_critique_text = f"Solution was an error from Genesis: {solution_text}"
81
- llm_score = 0
82
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # 2. Code Execution (if Python problem, code exists, and tests are provided)
85
- if "python" in problem_type.lower() and solution_text and not solution_text.startswith("ERROR") and user_provided_tests_code.strip():
86
- print(f"INFO: evaluation_engine.py - Preparing to execute Python code candidate against user tests.")
87
- # Use the (simulated) safe executor
88
- execution_result_obj = execute_python_code_with_tests(
89
- solution_text, user_provided_tests_code, timeout_seconds=10 # Example timeout
90
  )
91
- print(f"INFO: evaluation_engine.py - Execution result: {execution_result_obj}")
92
- elif "python" in problem_type.lower() and not user_provided_tests_code.strip():
93
- execution_result_obj = ExecutionResult(success=True, output="No user tests provided to run against the Python code.", total_tests=0)
94
-
 
 
 
 
 
 
95
 
96
- # 3. Combine Scores into a Final Score (More sophisticated heuristic)
97
- combined_score = llm_score
98
- if execution_result_obj and execution_result_obj.total_tests > 0:
99
- if not execution_result_obj.success or execution_result_obj.error: # Major execution failure
100
- combined_score = max(1, llm_score - 5) # Penalize heavily
101
- else:
102
- pass_ratio = execution_result_obj.passed_tests / execution_result_obj.total_tests
103
- if pass_ratio == 1.0: # All tests passed
104
- combined_score = min(10, llm_score + 2) # Significant bonus
105
- elif pass_ratio >= 0.75: # Most tests passed
106
- combined_score = min(10, llm_score + 1) # Small bonus
107
- elif pass_ratio < 0.25: # Very few tests passed
108
- combined_score = max(1, llm_score - 4)
109
- else: # Some tests passed
110
- combined_score = int(llm_score * (0.5 + 0.5 * pass_ratio)) # Weighted average
111
-
112
- combined_score = max(1, min(10, combined_score)) # Clamp 1-10
113
 
114
- return EvaluationResultOutput(
115
- combined_score=combined_score,
116
- llm_critique_text=llm_critique_text,
117
- execution_details=execution_result_obj,
118
- raw_llm_response=raw_llm_critique_resp
119
- )
 
1
+ # algoforge_prime/core/evolution_engine.py
2
+ print("DEBUG: Importing core.evolution_engine")
3
 
4
+ # --- Corrected Imports ---
5
+ # Absolute imports for modules outside the 'core' package
6
+ from prompts.system_prompts import get_system_prompt
 
 
7
 
8
+ # Absolute imports for other modules within the 'core' package (or relative for siblings)
9
+ from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
 
 
 
 
10
 
11
+ # Relative import for a sibling module within the 'core' package
12
+ # from .safe_executor import ExecutionResult # Not directly used in this module, but evaluation_output_obj might contain it
13
+ # from .evaluation_engine import EvaluationResultOutput # For type hinting the parameter
 
 
 
 
 
 
 
 
 
 
14
 
15
+ print("DEBUG: core.evolution_engine - Imports successful")
16
 
17
+ def evolve_solution(
18
+ original_solution_text: str,
19
+ evaluation_output_obj, # This object comes from evaluation_engine and contains EvaluationResultOutput
20
+ # It will have a .get_display_critique() method and .combined_score attribute
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  problem_description: str,
22
  problem_type: str,
23
+ llm_client_config: dict # {"type": ..., "model_id": ..., "temp": ..., "max_tokens": ...}
24
+ ) -> str: # Returns evolved solution text or an error string
25
+ """
26
+ Attempts to evolve a solution based on its comprehensive evaluation details.
27
+ """
28
+ print(f"DEBUG: evolution_engine.py - Evolving solution. Problem type: {problem_type}")
29
+ system_p_evolve = get_system_prompt("evolution_general") # problem_type can be used for specialization here too
30
 
31
+ # Extract necessary info from the evaluation_output_obj
32
+ # This assumes evaluation_output_obj is an instance of EvaluationResultOutput from evaluation_engine.py
33
+ # or at least has these attributes/methods.
34
+ try:
35
+ critique_and_test_feedback = evaluation_output_obj.get_display_critique()
36
+ original_score = evaluation_output_obj.combined_score
37
+ except AttributeError as e:
38
+ print(f"ERROR: evolution_engine.py - evaluation_output_obj is missing expected attributes/methods: {e}")
39
+ # Fallback if the object structure is not as expected
40
+ critique_and_test_feedback = "Critique data was not in the expected format."
41
+ original_score = 0 # Assign a neutral score if real one can't be found
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ user_p_evolve = (
44
+ f"Original Problem Context: \"{problem_description}\"\n\n"
45
+ f"The solution to be evolved achieved a combined score of {original_score}/10.\n"
46
+ f"Here is the original solution text:\n```python\n{original_solution_text}\n```\n\n"
47
+ f"Here is the comprehensive evaluation it received (including LLM critique and automated test feedback if run):\n'''\n{critique_and_test_feedback}\n'''\n\n"
48
+ f"Your Task: Based on ALL the information above (solution, LLM critique, and crucially any test execution results/errors mentioned in the evaluation), "
49
+ f"evolve the provided solution to make it demonstrably superior. "
50
+ f"Prioritize fixing any reported execution errors or failed tests. "
51
+ f"Then, address other critique points like efficiency, clarity, or completeness. "
52
+ f"Output the *complete evolved solution*. "
53
+ f"Follow this with a brief explanation of the key changes and improvements you implemented, especially how you addressed test failures or execution issues."
54
+ )
55
 
56
+ llm_response_obj = None # type: LLMResponse
57
+ if llm_client_config["type"] == "hf":
58
+ llm_response_obj = call_huggingface_api(
59
+ user_p_evolve, llm_client_config["model_id"],
60
+ temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
61
+ system_prompt_text=system_p_evolve
62
  )
63
+ elif llm_client_config["type"] == "google_gemini":
64
+ llm_response_obj = call_gemini_api(
65
+ user_p_evolve, llm_client_config["model_id"],
66
+ temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
67
+ system_prompt_text=system_p_evolve
68
+ )
69
+ else:
70
+ error_msg = f"ERROR (Evolution): Unknown LLM client type '{llm_client_config['type']}'"
71
+ print(f"ERROR: evolution_engine.py - {error_msg}")
72
+ return error_msg
73
 
74
+ if llm_response_obj.success:
75
+ return llm_response_obj.text
76
+ else:
77
+ # Error is already logged by call_..._api functions if it's from there
78
+ return f"ERROR (Evolution with {llm_response_obj.model_id_used}): {llm_response_obj.error}"
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ print("DEBUG: core.evolution_engine - Module fully defined.")