Spaces:

mgbam
/

StoryVerseWeaver

Sleeping

App Files Files Community

mgbam commited on May 17

Commit

b09495b

verified ·

1 Parent(s): 9830d96

Update core/generation_engine.py

Browse files

Files changed (1) hide show

core/generation_engine.py +97 -114

core/generation_engine.py CHANGED Viewed

@@ -1,125 +1,108 @@
-# algoforge_prime/core/evaluation_engine.py
-import random
-# (Keep your placeholder _placeholder_safe_python_execution as is)
-from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
-from prompts.system_prompts import get_system_prompt
-from prompts.prompt_templates import format_genesis_user_prompt
-class EvaluationResult: # Keep this class definition
-    def __init__(self, score=0, critique_text="", passed_tests=0, total_tests=0, execution_summary=None, raw_llm_critique_response=None):
-        self.score = score
-        self.critique_text = critique_text
-        self.passed_tests = passed_tests
-        self.total_tests = total_tests
-        self.execution_summary = execution_summary
-        self.raw_llm_critique_response = raw_llm_critique_response
-    def __str__(self):
-        return f"Score: {self.score}/10. Tests: {self.passed_tests}/{self.total_tests}. Summary: {self.execution_summary}. Critique: {self.critique_text[:100]}..."
-def _parse_score_from_llm_text(llm_text_output: str) -> int: # Keep this helper
-    # ... (implementation as before) ...
-    score = 0
-    if not llm_text_output or not isinstance(llm_text_output, str): return score
     try:
-        import re
-        match = re.search(r"Score:\s*(\d+)(?:\s*/\s*10)?", llm_text_output, re.IGNORECASE)
-        if match:
-            parsed_score_val = int(match.group(1))
-            score = max(1, min(parsed_score_val, 10))
-        else:
-            score = random.randint(3, 6)
-    except Exception: score = random.randint(3, 5)
-    return score
-def _placeholder_safe_python_execution(code_string: str, user_tests_string: str) -> tuple[int, int, str]: # Keep this placeholder
-    # ... (implementation as before) ...
-    print(f"DEV_INFO: evaluation_engine.py - Entering PLACEHOLDER for code execution.")
-    if not user_tests_string.strip() or not code_string.strip(): return 0, 0, "SIMULATED: No tests/code."
-    test_lines = [line.strip() for line in user_tests_string.splitlines() if line.strip().startswith("assert")]
-    total_tests_found = len(test_lines)
-    if total_tests_found == 0: return 0, 0, "SIMULATED: No 'assert' statements."
-    passed_count = random.randint(total_tests_found // 2, total_tests_found) # Simulate some passing
-    summary = f"Simulated: {passed_count}/{total_tests_found} tests passed."
-    if passed_count < total_tests_found: summary += " Some tests likely failed."
-    return passed_count, total_tests_found, summary
-def evaluate_solution_candidate(
-    solution_text: str,
-    problem_description: str,
-    problem_type: str,
-    user_provided_tests: str,
-    llm_client_config: dict
-) -> EvaluationResult:
-    llm_critique_output_text = "LLM critique could not be performed."
-    llm_based_score = 0
-    raw_llm_critique_resp = None
-    if solution_text and not solution_text.startswith("ERROR"):
-        system_p_critique = get_system_prompt("critique_general")
-        user_p_critique = format_critique_user_prompt(problem_description, solution_text)
         llm_response_obj = None # type: LLMResponse
-        if llm_client_config["type"] == "hf":
-            llm_response_obj = call_huggingface_api(
-                user_p_critique, llm_client_config["model_id"],
-                temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
-                system_prompt_text=system_p_critique
-            )
-        elif llm_client_config["type"] == "google_gemini":
-            llm_response_obj = call_gemini_api(
-                user_p_critique, llm_client_config["model_id"],
-                temperature=llm_client_config["temp"], max_new_tokens=llm_client_config["max_tokens"],
-                system_prompt_text=system_p_critique
-            )
-        if llm_response_obj:
-            raw_llm_critique_resp = llm_response_obj.raw_response
-            if llm_response_obj.success:
-                llm_critique_output_text = llm_response_obj.text
-                llm_based_score = _parse_score_from_llm_text(llm_critique_output_text)
-            else:
-                llm_critique_output_text = f"Error during LLM critique (Model: {llm_response_obj.model_id_used}): {llm_response_obj.error}"
-                llm_based_score = 0
-    elif solution_text and solution_text.startswith("ERROR"):
-        llm_critique_output_text = f"Solution was an error from Genesis: {solution_text}"
-        llm_based_score = 0
-    passed_tests_count = 0
-    total_tests_count = 0
-    exec_summary_msg = "Automated tests not applicable or not run."
-    if "python" in problem_type.lower() and user_provided_tests.strip() and solution_text and not solution_text.startswith("ERROR"):
-        passed_tests_count, total_tests_count, exec_summary_msg = _placeholder_safe_python_execution(
-            solution_text, user_provided_tests
-        )
-    elif "python" in problem_type.lower() and not user_provided_tests.strip():
-        exec_summary_msg = "No user tests provided for this Python problem."
-    final_score_calculated = llm_based_score
-    if total_tests_count > 0:
-        test_pass_ratio = passed_tests_count / total_tests_count
-        if test_pass_ratio < 0.5 :
-            final_score_calculated = max(1, int(llm_based_score * 0.5) - 1)
-        elif test_pass_ratio == 1.0 and passed_tests_count > 0:
-            final_score_calculated = min(10, llm_based_score + 1 if llm_based_score < 10 else 10)
-        else:
-            final_score_calculated = int(llm_based_score * (0.6 + 0.4 * test_pass_ratio))
-    final_score_calculated = max(1, min(10, final_score_calculated))
-    comprehensive_critique = f"{llm_critique_output_text}"
-    if total_tests_count > 0 or ("python" in problem_type.lower() and user_provided_tests.strip()):
-        comprehensive_critique += f"\n\n**Automated Test Summary (Simulated):**\n{exec_summary_msg}\n"
-        comprehensive_critique += f"Passed: {passed_tests_count}/{total_tests_count}"
-    return EvaluationResult(
-        score=final_score_calculated,
-        critique_text=comprehensive_critique,
-        passed_tests=passed_tests_count,
-        total_tests=total_tests_count,
-        execution_summary=exec_summary_msg,
-        raw_llm_critique_response=raw_llm_critique_resp
-    )

+# algoforge_prime/core/generation_engine.py
+print("DEBUG: Importing core.generation_engine") # For checking if this file is reached
+try:
+    # Using absolute imports assuming 'algoforge_prime' (containing 'core' and 'prompts')
+    # is the top-level package context when app.py runs.
+    from core.llm_clients import call_huggingface_api, call_gemini_api, LLMResponse
+    from prompts.system_prompts import get_system_prompt
+    from prompts.prompt_templates import format_genesis_user_prompt
+    print("DEBUG: core.generation_engine - Imports successful")
+except ImportError as e:
+    print(f"ERROR: core.generation_engine - ImportError during its own imports: {e}")
+    # This exception would likely prevent the rest of the file from defining generate_initial_solutions
+    # and would be the root cause of the error seen in app.py
+    raise # Re-raise to make it obvious in logs if this is the point of failure
+def generate_initial_solutions(
+    problem_description: str,
+    initial_hints: str,
+    problem_type: str, # e.g., "Python Algorithm with Tests"
+    num_solutions_to_generate: int,
+    llm_client_config: dict # {"type": "hf" or "google_gemini", "model_id": "...", "temp": ..., "max_tokens": ...}
+) -> list[str]: # Returns a list of strings (solutions or error messages)
+    """
+    Generates a list of initial solution strings using the configured LLM.
+    Returns a list of strings, where each string is either a solution or an error message.
+    """
+    print(f"DEBUG: generate_initial_solutions called with problem_type: {problem_type}, num_solutions: {num_solutions_to_generate}")
+    solutions_or_errors = []
+    # Select system prompt based on problem type
+    system_p_key = "genesis_general" # Default system prompt key
+    if "python" in problem_type.lower():
+        system_p_key = "genesis_python"
     try:
+        system_p_genesis = get_system_prompt(system_p_key)
+        if not system_p_genesis: # Check if get_system_prompt returned an empty string (fallback)
+            print(f"WARNING: core.generation_engine - System prompt for key '{system_p_key}' was empty. Proceeding without system prompt for genesis.")
+    except Exception as e:
+        print(f"ERROR: core.generation_engine - Failed to get system prompt: {e}")
+        # Decide how to handle this: proceed without, or return an error for all solutions?
+        # For now, let's log and proceed without a system prompt if it fails.
+        system_p_genesis = None # Or some very generic fallback string
+    for i in range(num_solutions_to_generate):
+        print(f"DEBUG: Generating solution candidate {i+1}/{num_solutions_to_generate}")
+        try:
+            user_p_genesis = format_genesis_user_prompt(
+                problem_description, initial_hints, i + 1, num_solutions_to_generate
+            )
+        except Exception as e:
+            print(f"ERROR: core.generation_engine - Failed to format genesis user prompt: {e}")
+            solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Internal error formatting prompt.")
+            continue # Skip to next attempt
         llm_response_obj = None # type: LLMResponse
+        if not llm_client_config or "type" not in llm_client_config or "model_id" not in llm_client_config:
+            error_msg = f"ERROR (Genesis Attempt {i+1}): Invalid llm_client_config provided."
+            print(f"CRITICAL_ERROR: core.generation_engine - {error_msg}")
+            solutions_or_errors.append(error_msg)
+            continue
+        try:
+            if llm_client_config["type"] == "hf":
+                llm_response_obj = call_huggingface_api(
+                    user_p_genesis,
+                    llm_client_config["model_id"],
+                    temperature=llm_client_config.get("temp", 0.7), # Use .get for safety
+                    max_new_tokens=llm_client_config.get("max_tokens", 512),
+                    system_prompt_text=system_p_genesis
+                )
+            elif llm_client_config["type"] == "google_gemini":
+                llm_response_obj = call_gemini_api(
+                    user_p_genesis,
+                    llm_client_config["model_id"],
+                    temperature=llm_client_config.get("temp", 0.7),
+                    max_new_tokens=llm_client_config.get("max_tokens", 768),
+                    system_prompt_text=system_p_genesis
+                )
+            else:
+                solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown LLM client type '{llm_client_config['type']}'")
+                continue # Skip to next attempt
+        except Exception as e_call:
+            # This catch block is crucial if call_..._api functions themselves raise exceptions
+            # before returning an LLMResponse object (though they are designed to return LLMResponse(error=...))
+            error_msg = f"ERROR (Genesis Attempt {i+1} calling LLM {llm_client_config['model_id']}): Exception during API call: {type(e_call).__name__} - {str(e_call)}"
+            print(f"ERROR: core.generation_engine - {error_msg}")
+            solutions_or_errors.append(error_msg)
+            continue
+        if llm_response_obj and llm_response_obj.success:
+            solutions_or_errors.append(llm_response_obj.text if llm_response_obj.text is not None else "")
+            print(f"DEBUG: Solution candidate {i+1} generated successfully (Model: {llm_response_obj.model_id_used}).")
+        elif llm_response_obj: # Error occurred and was encapsulated in LLMResponse
+            solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1} with {llm_response_obj.model_id_used}): {llm_response_obj.error}")
+            print(f"DEBUG: Solution candidate {i+1} FAILED with error from LLMResponse (Model: {llm_response_obj.model_id_used}). Error: {llm_response_obj.error}")
+        else: # Should ideally not happen if LLMResponse is always returned from call_..._api
+            solutions_or_errors.append(f"ERROR (Genesis Attempt {i+1}): Unknown error, LLM response object was None.")
+            print(f"CRITICAL_DEBUG: Solution candidate {i+1} - LLM response object was None. This indicates an issue in call_..._api not returning an LLMResponse object.")
+    print(f"DEBUG: generate_initial_solutions finished. Returning {len(solutions_or_errors)} items.")
+    return solutions_or_errors
+# A print statement at the end of the module definition
+print("DEBUG: core.generation_engine - Module fully defined, including generate_initial_solutions.")