mgbam commited on
Commit
b09495b
·
verified ·
1 Parent(s): 9830d96

Update core/generation_engine.py

Browse files
Files changed (1) hide show
  1. core/generation_engine.py +97 -114
core/generation_engine.py CHANGED
@@ -1,125 +1,108 @@
1
- # algoforge_prime/core/evaluation_engine.py
2
- import random
3
- # (Keep your placeholder _placeholder_safe_python_execution as is)
4
 
5
- from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
6
- from prompts.system_prompts import get_system_prompt
7
- from prompts.prompt_templates import format_genesis_user_prompt
 
 
 
 
 
 
 
 
 
8
 
9
- class EvaluationResult: # Keep this class definition
10
- def __init__(self, score=0, critique_text="", passed_tests=0, total_tests=0, execution_summary=None, raw_llm_critique_response=None):
11
- self.score = score
12
- self.critique_text = critique_text
13
- self.passed_tests = passed_tests
14
- self.total_tests = total_tests
15
- self.execution_summary = execution_summary
16
- self.raw_llm_critique_response = raw_llm_critique_response
17
-
18
- def __str__(self):
19
- return f"Score: {self.score}/10. Tests: {self.passed_tests}/{self.total_tests}. Summary: {self.execution_summary}. Critique: {self.critique_text[:100]}..."
20
-
21
- def _parse_score_from_llm_text(llm_text_output: str) -> int: # Keep this helper
22
- # ... (implementation as before) ...
23
- score = 0
24
- if not llm_text_output or not isinstance(llm_text_output, str): return score
 
 
 
25
  try:
26
- import re
27
- match = re.search(r"Score:\s*(\d+)(?:\s*/\s*10)?", llm_text_output, re.IGNORECASE)
28
- if match:
29
- parsed_score_val = int(match.group(1))
30
- score = max(1, min(parsed_score_val, 10))
31
- else:
32
- score = random.randint(3, 6)
33
- except Exception: score = random.randint(3, 5)
34
- return score
35
-
36
 
37
- def _placeholder_safe_python_execution(code_string: str, user_tests_string: str) -> tuple[int, int, str]: # Keep this placeholder
38
- # ... (implementation as before) ...
39
- print(f"DEV_INFO: evaluation_engine.py - Entering PLACEHOLDER for code execution.")
40
- if not user_tests_string.strip() or not code_string.strip(): return 0, 0, "SIMULATED: No tests/code."
41
- test_lines = [line.strip() for line in user_tests_string.splitlines() if line.strip().startswith("assert")]
42
- total_tests_found = len(test_lines)
43
- if total_tests_found == 0: return 0, 0, "SIMULATED: No 'assert' statements."
44
- passed_count = random.randint(total_tests_found // 2, total_tests_found) # Simulate some passing
45
- summary = f"Simulated: {passed_count}/{total_tests_found} tests passed."
46
- if passed_count < total_tests_found: summary += " Some tests likely failed."
47
- return passed_count, total_tests_found, summary
48
-
49
-
50
- def evaluate_solution_candidate(
51
- solution_text: str,
52
- problem_description: str,
53
- problem_type: str,
54
- user_provided_tests: str,
55
- llm_client_config: dict
56
- ) -> EvaluationResult:
57
- llm_critique_output_text = "LLM critique could not be performed."
58
- llm_based_score = 0
59
- raw_llm_critique_resp = None
60
 
61
- if solution_text and not solution_text.startswith("ERROR"):
62
- system_p_critique = get_system_prompt("critique_general")
63
- user_p_critique = format_critique_user_prompt(problem_description, solution_text)
64
-
65
  llm_response_obj = None # type: LLMResponse
66
- if llm_client_config["type"] == "hf":
67
- llm_response_obj = call_huggingface_api(
68
- user_p_critique, llm_client_config["model_id"],
69
- temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
70
- system_prompt_text=system_p_critique
71
- )
72
- elif llm_client_config["type"] == "google_gemini":
73
- llm_response_obj = call_gemini_api(
74
- user_p_critique, llm_client_config["model_id"],
75
- temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
76
- system_prompt_text=system_p_critique
77
- )
78
 
79
- if llm_response_obj:
80
- raw_llm_critique_resp = llm_response_obj.raw_response
81
- if llm_response_obj.success:
82
- llm_critique_output_text = llm_response_obj.text
83
- llm_based_score = _parse_score_from_llm_text(llm_critique_output_text)
84
- else:
85
- llm_critique_output_text = f"Error during LLM critique (Model: {llm_response_obj.model_id_used}): {llm_response_obj.error}"
86
- llm_based_score = 0
87
- elif solution_text and solution_text.startswith("ERROR"):
88
- llm_critique_output_text = f"Solution was an error from Genesis: {solution_text}"
89
- llm_based_score = 0
90
-
91
- passed_tests_count = 0
92
- total_tests_count = 0
93
- exec_summary_msg = "Automated tests not applicable or not run."
94
 
95
- if "python" in problem_type.lower() and user_provided_tests.strip() and solution_text and not solution_text.startswith("ERROR"):
96
- passed_tests_count, total_tests_count, exec_summary_msg = _placeholder_safe_python_execution(
97
- solution_text, user_provided_tests
98
- )
99
- elif "python" in problem_type.lower() and not user_provided_tests.strip():
100
- exec_summary_msg = "No user tests provided for this Python problem."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- final_score_calculated = llm_based_score
103
- if total_tests_count > 0:
104
- test_pass_ratio = passed_tests_count / total_tests_count
105
- if test_pass_ratio < 0.5 :
106
- final_score_calculated = max(1, int(llm_based_score * 0.5) - 1)
107
- elif test_pass_ratio == 1.0 and passed_tests_count > 0:
108
- final_score_calculated = min(10, llm_based_score + 1 if llm_based_score < 10 else 10)
109
- else:
110
- final_score_calculated = int(llm_based_score * (0.6 + 0.4 * test_pass_ratio))
111
- final_score_calculated = max(1, min(10, final_score_calculated))
112
 
113
- comprehensive_critique = f"{llm_critique_output_text}"
114
- if total_tests_count > 0 or ("python" in problem_type.lower() and user_provided_tests.strip()):
115
- comprehensive_critique += f"\n\n**Automated Test Summary (Simulated):**\n{exec_summary_msg}\n"
116
- comprehensive_critique += f"Passed: {passed_tests_count}/{total_tests_count}"
 
 
 
 
 
 
 
 
117
 
118
- return EvaluationResult(
119
- score=final_score_calculated,
120
- critique_text=comprehensive_critique,
121
- passed_tests=passed_tests_count,
122
- total_tests=total_tests_count,
123
- execution_summary=exec_summary_msg,
124
- raw_llm_critique_response=raw_llm_critique_resp
125
- )
 
1
+ # algoforge_prime/core/generation_engine.py
2
+ print("DEBUG: Importing core.generation_engine") # For checking if this file is reached
 
3
 
4
+ try:
5
+ # Using absolute imports assuming 'algoforge_prime' (containing 'core' and 'prompts')
6
+ # is the top-level package context when app.py runs.
7
+ from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
8
+ from prompts.system_prompts import get_system_prompt
9
+ from prompts.prompt_templates import format_genesis_user_prompt
10
+ print("DEBUG: core.generation_engine - Imports successful")
11
+ except ImportError as e:
12
+ print(f"ERROR: core.generation_engine - ImportError during its own imports: {e}")
13
+ # This exception would likely prevent the rest of the file from defining generate_initial_solutions
14
+ # and would be the root cause of the error seen in app.py
15
+ raise # Re-raise to make it obvious in logs if this is the point of failure
16
 
17
+ def generate_initial_solutions(
18
+ problem_description: str,
19
+ initial_hints: str,
20
+ problem_type: str, # e.g., "Python Algorithm with Tests"
21
+ num_solutions_to_generate: int,
22
+ llm_client_config: dict # {"type": "hf" or "google_gemini", "model_id": "...", "temp": ..., "max_tokens": ...}
23
+ ) -> list[str]: # Returns a list of strings (solutions or error messages)
24
+ """
25
+ Generates a list of initial solution strings using the configured LLM.
26
+ Returns a list of strings, where each string is either a solution or an error message.
27
+ """
28
+ print(f"DEBUG: generate_initial_solutions called with problem_type: {problem_type}, num_solutions: {num_solutions_to_generate}")
29
+ solutions_or_errors = []
30
+
31
+ # Select system prompt based on problem type
32
+ system_p_key = "genesis_general" # Default system prompt key
33
+ if "python" in problem_type.lower():
34
+ system_p_key = "genesis_python"
35
+
36
  try:
37
+ system_p_genesis = get_system_prompt(system_p_key)
38
+ if not system_p_genesis: # Check if get_system_prompt returned an empty string (fallback)
39
+ print(f"WARNING: core.generation_engine - System prompt for key '{system_p_key}' was empty. Proceeding without system prompt for genesis.")
40
+ except Exception as e:
41
+ print(f"ERROR: core.generation_engine - Failed to get system prompt: {e}")
42
+ # Decide how to handle this: proceed without, or return an error for all solutions?
43
+ # For now, let's log and proceed without a system prompt if it fails.
44
+ system_p_genesis = None # Or some very generic fallback string
 
 
45
 
46
+ for i in range(num_solutions_to_generate):
47
+ print(f"DEBUG: Generating solution candidate {i+1}/{num_solutions_to_generate}")
48
+ try:
49
+ user_p_genesis = format_genesis_user_prompt(
50
+ problem_description, initial_hints, i + 1, num_solutions_to_generate
51
+ )
52
+ except Exception as e:
53
+ print(f"ERROR: core.generation_engine - Failed to format genesis user prompt: {e}")
54
+ solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Internal error formatting prompt.")
55
+ continue # Skip to next attempt
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
57
  llm_response_obj = None # type: LLMResponse
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ if not llm_client_config or "type" not in llm_client_config or "model_id" not in llm_client_config:
60
+ error_msg = f"ERROR (Genesis Attempt {i+1}): Invalid llm_client_config provided."
61
+ print(f"CRITICAL_ERROR: core.generation_engine - {error_msg}")
62
+ solutions_or_errors.append(error_msg)
63
+ continue
 
 
 
 
 
 
 
 
 
 
64
 
65
+ try:
66
+ if llm_client_config["type"] == "hf":
67
+ llm_response_obj = call_huggingface_api(
68
+ user_p_genesis,
69
+ llm_client_config["model_id"],
70
+ temperature=llm_client_config.get("temp", 0.7), # Use .get for safety
71
+ max_new_tokens=llm_client_config.get("max_tokens", 512),
72
+ system_prompt_text=system_p_genesis
73
+ )
74
+ elif llm_client_config["type"] == "google_gemini":
75
+ llm_response_obj = call_gemini_api(
76
+ user_p_genesis,
77
+ llm_client_config["model_id"],
78
+ temperature=llm_client_config.get("temp", 0.7),
79
+ max_new_tokens=llm_client_config.get("max_tokens", 768),
80
+ system_prompt_text=system_p_genesis
81
+ )
82
+ else:
83
+ solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown LLM client type '{llm_client_config['type']}'")
84
+ continue # Skip to next attempt
85
+ except Exception as e_call:
86
+ # This catch block is crucial if call_..._api functions themselves raise exceptions
87
+ # before returning an LLMResponse object (though they are designed to return LLMResponse(error=...))
88
+ error_msg = f"ERROR (Genesis Attempt {i+1} calling LLM {llm_client_config['model_id']}): Exception during API call: {type(e_call).__name__} - {str(e_call)}"
89
+ print(f"ERROR: core.generation_engine - {error_msg}")
90
+ solutions_or_errors.append(error_msg)
91
+ continue
92
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ if llm_response_obj and llm_response_obj.success:
95
+ solutions_or_errors.append(llm_response_obj.text if llm_response_obj.text is not None else "")
96
+ print(f"DEBUG: Solution candidate {i+1} generated successfully (Model: {llm_response_obj.model_id_used}).")
97
+ elif llm_response_obj: # Error occurred and was encapsulated in LLMResponse
98
+ solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1} with {llm_response_obj.model_id_used}): {llm_response_obj.error}")
99
+ print(f"DEBUG: Solution candidate {i+1} FAILED with error from LLMResponse (Model: {llm_response_obj.model_id_used}). Error: {llm_response_obj.error}")
100
+ else: # Should ideally not happen if LLMResponse is always returned from call_..._api
101
+ solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown error, LLM response object was None.")
102
+ print(f"CRITICAL_DEBUG: Solution candidate {i+1} - LLM response object was None. This indicates an issue in call_..._api not returning an LLMResponse object.")
103
+
104
+ print(f"DEBUG: generate_initial_solutions finished. Returning {len(solutions_or_errors)} items.")
105
+ return solutions_or_errors
106
 
107
+ # A print statement at the end of the module definition
108
+ print("DEBUG: core.generation_engine - Module fully defined, including generate_initial_solutions.")