Spaces:
Sleeping
Sleeping
Update core/generation_engine.py
Browse files- core/generation_engine.py +97 -114
core/generation_engine.py
CHANGED
@@ -1,125 +1,108 @@
|
|
1 |
-
# algoforge_prime/core/
|
2 |
-
|
3 |
-
# (Keep your placeholder _placeholder_safe_python_execution as is)
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
25 |
try:
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
return score
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
return passed_count, total_tests_found, summary
|
48 |
-
|
49 |
-
|
50 |
-
def evaluate_solution_candidate(
|
51 |
-
solution_text: str,
|
52 |
-
problem_description: str,
|
53 |
-
problem_type: str,
|
54 |
-
user_provided_tests: str,
|
55 |
-
llm_client_config: dict
|
56 |
-
) -> EvaluationResult:
|
57 |
-
llm_critique_output_text = "LLM critique could not be performed."
|
58 |
-
llm_based_score = 0
|
59 |
-
raw_llm_critique_resp = None
|
60 |
|
61 |
-
if solution_text and not solution_text.startswith("ERROR"):
|
62 |
-
system_p_critique = get_system_prompt("critique_general")
|
63 |
-
user_p_critique = format_critique_user_prompt(problem_description, solution_text)
|
64 |
-
|
65 |
llm_response_obj = None # type: LLMResponse
|
66 |
-
if llm_client_config["type"] == "hf":
|
67 |
-
llm_response_obj = call_huggingface_api(
|
68 |
-
user_p_critique, llm_client_config["model_id"],
|
69 |
-
temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
|
70 |
-
system_prompt_text=system_p_critique
|
71 |
-
)
|
72 |
-
elif llm_client_config["type"] == "google_gemini":
|
73 |
-
llm_response_obj = call_gemini_api(
|
74 |
-
user_p_critique, llm_client_config["model_id"],
|
75 |
-
temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
|
76 |
-
system_prompt_text=system_p_critique
|
77 |
-
)
|
78 |
|
79 |
-
if
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
else:
|
85 |
-
llm_critique_output_text = f"Error during LLM critique (Model: {llm_response_obj.model_id_used}): {llm_response_obj.error}"
|
86 |
-
llm_based_score = 0
|
87 |
-
elif solution_text and solution_text.startswith("ERROR"):
|
88 |
-
llm_critique_output_text = f"Solution was an error from Genesis: {solution_text}"
|
89 |
-
llm_based_score = 0
|
90 |
-
|
91 |
-
passed_tests_count = 0
|
92 |
-
total_tests_count = 0
|
93 |
-
exec_summary_msg = "Automated tests not applicable or not run."
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
final_score_calculated = llm_based_score
|
103 |
-
if total_tests_count > 0:
|
104 |
-
test_pass_ratio = passed_tests_count / total_tests_count
|
105 |
-
if test_pass_ratio < 0.5 :
|
106 |
-
final_score_calculated = max(1, int(llm_based_score * 0.5) - 1)
|
107 |
-
elif test_pass_ratio == 1.0 and passed_tests_count > 0:
|
108 |
-
final_score_calculated = min(10, llm_based_score + 1 if llm_based_score < 10 else 10)
|
109 |
-
else:
|
110 |
-
final_score_calculated = int(llm_based_score * (0.6 + 0.4 * test_pass_ratio))
|
111 |
-
final_score_calculated = max(1, min(10, final_score_calculated))
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
critique_text=comprehensive_critique,
|
121 |
-
passed_tests=passed_tests_count,
|
122 |
-
total_tests=total_tests_count,
|
123 |
-
execution_summary=exec_summary_msg,
|
124 |
-
raw_llm_critique_response=raw_llm_critique_resp
|
125 |
-
)
|
|
|
1 |
+
# algoforge_prime/core/generation_engine.py
|
2 |
+
print("DEBUG: Importing core.generation_engine") # For checking if this file is reached
|
|
|
3 |
|
4 |
+
try:
|
5 |
+
# Using absolute imports assuming 'algoforge_prime' (containing 'core' and 'prompts')
|
6 |
+
# is the top-level package context when app.py runs.
|
7 |
+
from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
|
8 |
+
from prompts.system_prompts import get_system_prompt
|
9 |
+
from prompts.prompt_templates import format_genesis_user_prompt
|
10 |
+
print("DEBUG: core.generation_engine - Imports successful")
|
11 |
+
except ImportError as e:
|
12 |
+
print(f"ERROR: core.generation_engine - ImportError during its own imports: {e}")
|
13 |
+
# This exception would likely prevent the rest of the file from defining generate_initial_solutions
|
14 |
+
# and would be the root cause of the error seen in app.py
|
15 |
+
raise # Re-raise to make it obvious in logs if this is the point of failure
|
16 |
|
17 |
+
def generate_initial_solutions(
|
18 |
+
problem_description: str,
|
19 |
+
initial_hints: str,
|
20 |
+
problem_type: str, # e.g., "Python Algorithm with Tests"
|
21 |
+
num_solutions_to_generate: int,
|
22 |
+
llm_client_config: dict # {"type": "hf" or "google_gemini", "model_id": "...", "temp": ..., "max_tokens": ...}
|
23 |
+
) -> list[str]: # Returns a list of strings (solutions or error messages)
|
24 |
+
"""
|
25 |
+
Generates a list of initial solution strings using the configured LLM.
|
26 |
+
Returns a list of strings, where each string is either a solution or an error message.
|
27 |
+
"""
|
28 |
+
print(f"DEBUG: generate_initial_solutions called with problem_type: {problem_type}, num_solutions: {num_solutions_to_generate}")
|
29 |
+
solutions_or_errors = []
|
30 |
+
|
31 |
+
# Select system prompt based on problem type
|
32 |
+
system_p_key = "genesis_general" # Default system prompt key
|
33 |
+
if "python" in problem_type.lower():
|
34 |
+
system_p_key = "genesis_python"
|
35 |
+
|
36 |
try:
|
37 |
+
system_p_genesis = get_system_prompt(system_p_key)
|
38 |
+
if not system_p_genesis: # Check if get_system_prompt returned an empty string (fallback)
|
39 |
+
print(f"WARNING: core.generation_engine - System prompt for key '{system_p_key}' was empty. Proceeding without system prompt for genesis.")
|
40 |
+
except Exception as e:
|
41 |
+
print(f"ERROR: core.generation_engine - Failed to get system prompt: {e}")
|
42 |
+
# Decide how to handle this: proceed without, or return an error for all solutions?
|
43 |
+
# For now, let's log and proceed without a system prompt if it fails.
|
44 |
+
system_p_genesis = None # Or some very generic fallback string
|
|
|
|
|
45 |
|
46 |
+
for i in range(num_solutions_to_generate):
|
47 |
+
print(f"DEBUG: Generating solution candidate {i+1}/{num_solutions_to_generate}")
|
48 |
+
try:
|
49 |
+
user_p_genesis = format_genesis_user_prompt(
|
50 |
+
problem_description, initial_hints, i + 1, num_solutions_to_generate
|
51 |
+
)
|
52 |
+
except Exception as e:
|
53 |
+
print(f"ERROR: core.generation_engine - Failed to format genesis user prompt: {e}")
|
54 |
+
solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Internal error formatting prompt.")
|
55 |
+
continue # Skip to next attempt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
|
|
|
|
|
|
57 |
llm_response_obj = None # type: LLMResponse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
if not llm_client_config or "type" not in llm_client_config or "model_id" not in llm_client_config:
|
60 |
+
error_msg = f"ERROR (Genesis Attempt {i+1}): Invalid llm_client_config provided."
|
61 |
+
print(f"CRITICAL_ERROR: core.generation_engine - {error_msg}")
|
62 |
+
solutions_or_errors.append(error_msg)
|
63 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
try:
|
66 |
+
if llm_client_config["type"] == "hf":
|
67 |
+
llm_response_obj = call_huggingface_api(
|
68 |
+
user_p_genesis,
|
69 |
+
llm_client_config["model_id"],
|
70 |
+
temperature=llm_client_config.get("temp", 0.7), # Use .get for safety
|
71 |
+
max_new_tokens=llm_client_config.get("max_tokens", 512),
|
72 |
+
system_prompt_text=system_p_genesis
|
73 |
+
)
|
74 |
+
elif llm_client_config["type"] == "google_gemini":
|
75 |
+
llm_response_obj = call_gemini_api(
|
76 |
+
user_p_genesis,
|
77 |
+
llm_client_config["model_id"],
|
78 |
+
temperature=llm_client_config.get("temp", 0.7),
|
79 |
+
max_new_tokens=llm_client_config.get("max_tokens", 768),
|
80 |
+
system_prompt_text=system_p_genesis
|
81 |
+
)
|
82 |
+
else:
|
83 |
+
solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown LLM client type '{llm_client_config['type']}'")
|
84 |
+
continue # Skip to next attempt
|
85 |
+
except Exception as e_call:
|
86 |
+
# This catch block is crucial if call_..._api functions themselves raise exceptions
|
87 |
+
# before returning an LLMResponse object (though they are designed to return LLMResponse(error=...))
|
88 |
+
error_msg = f"ERROR (Genesis Attempt {i+1} calling LLM {llm_client_config['model_id']}): Exception during API call: {type(e_call).__name__} - {str(e_call)}"
|
89 |
+
print(f"ERROR: core.generation_engine - {error_msg}")
|
90 |
+
solutions_or_errors.append(error_msg)
|
91 |
+
continue
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
+
if llm_response_obj and llm_response_obj.success:
|
95 |
+
solutions_or_errors.append(llm_response_obj.text if llm_response_obj.text is not None else "")
|
96 |
+
print(f"DEBUG: Solution candidate {i+1} generated successfully (Model: {llm_response_obj.model_id_used}).")
|
97 |
+
elif llm_response_obj: # Error occurred and was encapsulated in LLMResponse
|
98 |
+
solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1} with {llm_response_obj.model_id_used}): {llm_response_obj.error}")
|
99 |
+
print(f"DEBUG: Solution candidate {i+1} FAILED with error from LLMResponse (Model: {llm_response_obj.model_id_used}). Error: {llm_response_obj.error}")
|
100 |
+
else: # Should ideally not happen if LLMResponse is always returned from call_..._api
|
101 |
+
solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown error, LLM response object was None.")
|
102 |
+
print(f"CRITICAL_DEBUG: Solution candidate {i+1} - LLM response object was None. This indicates an issue in call_..._api not returning an LLMResponse object.")
|
103 |
+
|
104 |
+
print(f"DEBUG: generate_initial_solutions finished. Returning {len(solutions_or_errors)} items.")
|
105 |
+
return solutions_or_errors
|
106 |
|
107 |
+
# A print statement at the end of the module definition
|
108 |
+
print("DEBUG: core.generation_engine - Module fully defined, including generate_initial_solutions.")
|
|
|
|
|
|
|
|
|
|
|
|