mgbam commited on
Commit
993e62d
·
verified ·
1 Parent(s): 4e61147

Update core/generation_engine.py

Browse files
Files changed (1) hide show
  1. core/generation_engine.py +113 -34
core/generation_engine.py CHANGED
@@ -1,46 +1,125 @@
1
- # algoforge_prime/core/generation_engine.py
 
 
 
2
  from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse # Changed to absolute
3
  from prompts.system_prompts import get_system_prompt # Changed to absolute
4
- from prompts.prompt_templates import format_genesis_user_prompt # Changed to absolute
5
-
6
- def generate_initial_solutions(
7
- problem_description,
8
- initial_hints,
9
- problem_type, # e.g., "Python Algorithm with Tests"
10
- num_solutions_to_generate,
11
- llm_client_config # Dict: {"type": ..., "model_id": ..., "temp": ..., "max_tokens": ...}
12
- ):
13
- solutions_or_errors = []
14
- system_p_key = "genesis_general"
15
- if "python" in problem_type.lower():
16
- system_p_key = "genesis_python"
17
- system_p_genesis = get_system_prompt(system_p_key) # Uses the imported function
18
-
19
- for i in range(num_solutions_to_generate):
20
- user_p_genesis = format_genesis_user_prompt( # Uses the imported function
21
- problem_description, initial_hints, i + 1, num_solutions_to_generate
22
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  llm_response_obj = None # type: LLMResponse
25
  if llm_client_config["type"] == "hf":
26
- llm_response_obj = call_huggingface_api( # Uses the imported function
27
- user_p_genesis, llm_client_config["model_id"],
28
  temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
29
- system_prompt_text=system_p_genesis
30
  )
31
  elif llm_client_config["type"] == "google_gemini":
32
- llm_response_obj = call_gemini_api( # Uses the imported function
33
- user_p_genesis, llm_client_config["model_id"],
34
  temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
35
- system_prompt_text=system_p_genesis
36
  )
37
- else:
38
- solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown LLM client type '{llm_client_config['type']}'")
39
- continue
40
 
41
- if llm_response_obj.success:
42
- solutions_or_errors.append(llm_response_obj.text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  else:
44
- solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1} with {llm_response_obj.model_id_used}): {llm_response_obj.error}")
45
-
46
- return solutions_or_errors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # algoforge_prime/core/evaluation_engine.py
2
+ import random
3
+ # (Keep your placeholder _placeholder_safe_python_execution as is)
4
+
5
  from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse # Changed to absolute
6
  from prompts.system_prompts import get_system_prompt # Changed to absolute
7
+ from prompts.prompt_templates import format_critique_user_prompt # Changed to absolute
8
+
9
+ class EvaluationResult: # Keep this class definition
10
+ def __init__(self, score=0, critique_text="", passed_tests=0, total_tests=0, execution_summary=None, raw_llm_critique_response=None):
11
+ self.score = score
12
+ self.critique_text = critique_text
13
+ self.passed_tests = passed_tests
14
+ self.total_tests = total_tests
15
+ self.execution_summary = execution_summary
16
+ self.raw_llm_critique_response = raw_llm_critique_response
17
+
18
+ def __str__(self):
19
+ return f"Score: {self.score}/10. Tests: {self.passed_tests}/{self.total_tests}. Summary: {self.execution_summary}. Critique: {self.critique_text[:100]}..."
20
+
21
+ def _parse_score_from_llm_text(llm_text_output: str) -> int: # Keep this helper
22
+ # ... (implementation as before) ...
23
+ score = 0
24
+ if not llm_text_output or not isinstance(llm_text_output, str): return score
25
+ try:
26
+ import re
27
+ match = re.search(r"Score:\s*(\d+)(?:\s*/\s*10)?", llm_text_output, re.IGNORECASE)
28
+ if match:
29
+ parsed_score_val = int(match.group(1))
30
+ score = max(1, min(parsed_score_val, 10))
31
+ else:
32
+ score = random.randint(3, 6)
33
+ except Exception: score = random.randint(3, 5)
34
+ return score
35
+
36
+
37
+ def _placeholder_safe_python_execution(code_string: str, user_tests_string: str) -> tuple[int, int, str]: # Keep this placeholder
38
+ # ... (implementation as before) ...
39
+ print(f"DEV_INFO: evaluation_engine.py - Entering PLACEHOLDER for code execution.")
40
+ if not user_tests_string.strip() or not code_string.strip(): return 0, 0, "SIMULATED: No tests/code."
41
+ test_lines = [line.strip() for line in user_tests_string.splitlines() if line.strip().startswith("assert")]
42
+ total_tests_found = len(test_lines)
43
+ if total_tests_found == 0: return 0, 0, "SIMULATED: No 'assert' statements."
44
+ passed_count = random.randint(total_tests_found // 2, total_tests_found) # Simulate some passing
45
+ summary = f"Simulated: {passed_count}/{total_tests_found} tests passed."
46
+ if passed_count < total_tests_found: summary += " Some tests likely failed."
47
+ return passed_count, total_tests_found, summary
48
+
49
+
50
+ def evaluate_solution_candidate(
51
+ solution_text: str,
52
+ problem_description: str,
53
+ problem_type: str,
54
+ user_provided_tests: str,
55
+ llm_client_config: dict
56
+ ) -> EvaluationResult:
57
+ llm_critique_output_text = "LLM critique could not be performed."
58
+ llm_based_score = 0
59
+ raw_llm_critique_resp = None
60
+
61
+ if solution_text and not solution_text.startswith("ERROR"):
62
+ system_p_critique = get_system_prompt("critique_general")
63
+ user_p_critique = format_critique_user_prompt(problem_description, solution_text)
64
 
65
  llm_response_obj = None # type: LLMResponse
66
  if llm_client_config["type"] == "hf":
67
+ llm_response_obj = call_huggingface_api(
68
+ user_p_critique, llm_client_config["model_id"],
69
  temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
70
+ system_prompt_text=system_p_critique
71
  )
72
  elif llm_client_config["type"] == "google_gemini":
73
+ llm_response_obj = call_gemini_api(
74
+ user_p_critique, llm_client_config["model_id"],
75
  temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
76
+ system_prompt_text=system_p_critique
77
  )
 
 
 
78
 
79
+ if llm_response_obj:
80
+ raw_llm_critique_resp = llm_response_obj.raw_response
81
+ if llm_response_obj.success:
82
+ llm_critique_output_text = llm_response_obj.text
83
+ llm_based_score = _parse_score_from_llm_text(llm_critique_output_text)
84
+ else:
85
+ llm_critique_output_text = f"Error during LLM critique (Model: {llm_response_obj.model_id_used}): {llm_response_obj.error}"
86
+ llm_based_score = 0
87
+ elif solution_text and solution_text.startswith("ERROR"):
88
+ llm_critique_output_text = f"Solution was an error from Genesis: {solution_text}"
89
+ llm_based_score = 0
90
+
91
+ passed_tests_count = 0
92
+ total_tests_count = 0
93
+ exec_summary_msg = "Automated tests not applicable or not run."
94
+
95
+ if "python" in problem_type.lower() and user_provided_tests.strip() and solution_text and not solution_text.startswith("ERROR"):
96
+ passed_tests_count, total_tests_count, exec_summary_msg = _placeholder_safe_python_execution(
97
+ solution_text, user_provided_tests
98
+ )
99
+ elif "python" in problem_type.lower() and not user_provided_tests.strip():
100
+ exec_summary_msg = "No user tests provided for this Python problem."
101
+
102
+ final_score_calculated = llm_based_score
103
+ if total_tests_count > 0:
104
+ test_pass_ratio = passed_tests_count / total_tests_count
105
+ if test_pass_ratio < 0.5 :
106
+ final_score_calculated = max(1, int(llm_based_score * 0.5) - 1)
107
+ elif test_pass_ratio == 1.0 and passed_tests_count > 0:
108
+ final_score_calculated = min(10, llm_based_score + 1 if llm_based_score < 10 else 10)
109
  else:
110
+ final_score_calculated = int(llm_based_score * (0.6 + 0.4 * test_pass_ratio))
111
+ final_score_calculated = max(1, min(10, final_score_calculated))
112
+
113
+ comprehensive_critique = f"{llm_critique_output_text}"
114
+ if total_tests_count > 0 or ("python" in problem_type.lower() and user_provided_tests.strip()):
115
+ comprehensive_critique += f"\n\n**Automated Test Summary (Simulated):**\n{exec_summary_msg}\n"
116
+ comprehensive_critique += f"Passed: {passed_tests_count}/{total_tests_count}"
117
+
118
+ return EvaluationResult(
119
+ score=final_score_calculated,
120
+ critique_text=comprehensive_critique,
121
+ passed_tests=passed_tests_count,
122
+ total_tests=total_tests_count,
123
+ execution_summary=exec_summary_msg,
124
+ raw_llm_critique_response=raw_llm_critique_resp
125
+ )