Spaces:
Sleeping
Sleeping
# algoforge_prime/core/safe_executor.py | |
# WARNING: THIS IS A SIMULATED SAFE EXECUTOR. DO NOT USE FOR REAL UNTRUSTED CODE. | |
# A real implementation would require robust sandboxing. | |
# This simulation is purely for demonstrating the application flow and data structures. | |
import time | |
import random | |
import traceback # For capturing simulated exception details | |
class TestResult: | |
def __init__(self, test_string: str, passed: bool, error_message: str = None, output:str = None): | |
self.test_string = test_string # The original assert statement | |
self.passed = passed | |
self.error_message = error_message # e.g., AssertionError message | |
self.output = output # Any stdout specifically for this test (advanced) | |
class ExecutionResult: | |
def __init__(self, | |
success: bool, # Did the overall code snippet run without crashing the sandbox? | |
stdout: str = None, | |
stderr: str = None, # For general stderr, not specific test failures | |
execution_time: float = 0.0, | |
individual_test_results: list[TestResult] = None, # List of TestResult objects | |
compilation_error: str = None, # If code couldn't even compile/parse | |
timeout_error: bool = False | |
): | |
self.success = success | |
self.stdout = stdout if stdout else "" | |
self.stderr = stderr if stderr else "" | |
self.execution_time = execution_time | |
self.individual_test_results = individual_test_results if individual_test_results else [] | |
self.compilation_error = compilation_error | |
self.timeout_error = timeout_error | |
def passed_tests(self) -> int: | |
return sum(1 for tr in self.individual_test_results if tr.passed) | |
def total_tests(self) -> int: | |
return len(self.individual_test_results) | |
def overall_error_summary(self) -> str: | |
if self.compilation_error: return f"Compilation Error: {self.compilation_error}" | |
if self.timeout_error: return "Execution Timed Out." | |
if self.stderr and not self.individual_test_results: return f"Runtime Error: {self.stderr}" # General stderr if no specific test failures yet | |
# If there are test results, specific test errors are more informative | |
failed_tests = [tr for tr in self.individual_test_results if not tr.passed and tr.error_message] | |
if failed_tests: | |
return f"Failed Tests: {'; '.join([f'`{ft.test_string[:50]}...` ({ft.error_message[:50]}...)' for ft in failed_tests[:2]])}" # Show first 2 | |
if self.stderr: return f"General Stderr: {self.stderr}" # If tests passed but still stderr | |
return None # No obvious top-level error | |
def __str__(self): | |
status = "SUCCESS (All tests passed)" if self.success and self.total_tests > 0 and self.passed_tests == self.total_tests else \ | |
("PARTIAL SUCCESS (Some tests passed)" if self.success and self.total_tests > 0 and self.passed_tests > 0 else \ | |
("SUCCESS (No tests run or all passed if no asserts)" if self.success else "FAILED")) | |
error_info = self.overall_error_summary | |
test_summary = f"Tests: {self.passed_tests}/{self.total_tests} passed." if self.total_tests > 0 else "No assert-based tests run." | |
return (f"Execution {status}. {test_summary} Time: {self.execution_time:.4f}s. " | |
f"{('Error: ' + error_info) if error_info else ''} " | |
f"{('Stdout: ' + self.stdout[:100] + '...') if self.stdout else ''}") | |
def execute_python_code_with_tests(code_string: str, tests_string: str, timeout_seconds: int = 7) -> ExecutionResult: | |
""" | |
SIMULATES sandboxed execution of Python code against a set of assert-based tests. | |
""" | |
print(f"SIMULATOR: Attempting to 'execute' code. Timeout: {timeout_seconds}s") | |
print(f" Code (first 100 chars): {code_string[:100].strip()}...") | |
print(f" Tests (first 100 chars): {tests_string[:100].strip()}...") | |
start_time = time.time() | |
individual_results = [] | |
sim_stdout_lines = [] | |
sim_stderr_lines = [] | |
sim_compilation_error = None | |
sim_timeout = False | |
if not code_string.strip(): | |
return ExecutionResult(success=False, compilation_error="No code provided.", execution_time=time.time() - start_time) | |
# Simulate compilation/syntax check (very naive) | |
if "def foo bar" in code_string or "syntax error" in code_string.lower(): # Bad syntax example | |
sim_compilation_error = "Simulated: Invalid syntax detected." | |
return ExecutionResult(success=False, compilation_error=sim_compilation_error, execution_time=time.time()-start_time) | |
# Simulate timeout | |
if "while True:" in code_string and "break" not in code_string and "sleep" not in code_string: | |
sim_timeout = True | |
# time.sleep(timeout_seconds + 0.1) # Actual sleep for simulation | |
return ExecutionResult(success=False, timeout_error=True, execution_time=min(timeout_seconds, time.time() - start_time), individual_test_results=individual_results) | |
# Simulate runtime errors not caught by tests | |
if "1/0" in code_string or "zerodivisionerror" in code_string.lower(): | |
sim_stderr_lines.append("Traceback (most recent call last):\n File \"<string>\", line X, in <module>\nZeroDivisionError: division by zero") | |
return ExecutionResult(success=False, stderr="\n".join(sim_stderr_lines), execution_time=time.time()-start_time, individual_test_results=individual_results) | |
if "undefined_variable_xyz" in code_string: | |
sim_stderr_lines.append("Traceback (most recent call last):\n File \"<string>\", line Y, in <module>\nNameError: name 'undefined_variable_xyz' is not defined") | |
return ExecutionResult(success=False, stderr="\n".join(sim_stderr_lines), execution_time=time.time()-start_time, individual_test_results=individual_results) | |
# Simulate print statements in the main code | |
if "print('Setup complete')" in code_string: | |
sim_stdout_lines.append("Setup complete") | |
test_lines = [line.strip() for line in tests_string.splitlines() if line.strip().startswith("assert")] | |
for test_str in test_lines: | |
passed_this_test = True | |
error_this_test = None | |
# Super naive simulation based on keywords in the test or code | |
# This needs to be replaced by actual execution in a sandbox. | |
if "None" in test_str and "TypeError" in code_string and "raise TypeError" in code_string: | |
if "reverse_string(None)" in test_str: # Specific to the example | |
passed_this_test = True # Simulating a try-except test for TypeError | |
sim_stdout_lines.append(f"SimTest '{test_str[:30]}...': PASSED (TypeError correctly raised and caught - simulated)") | |
else: # General "None" test might fail if code doesn't handle it right | |
passed_this_test = random.choice([True, False, False]) # Make None checks less likely to pass randomly | |
elif "==" in test_str: | |
parts = test_str.split("==") | |
actual_call_sim = parts[0].replace("assert", "").strip() | |
expected_sim = parts[1].strip() | |
# Simulate based on a few known patterns from the example | |
if "reverse_string(\"hello\")" == actual_call_sim and "\"olleh\"" == expected_sim: | |
passed_this_test = True if "s[::-1]" in code_string or "char + reversed_string" in code_string else False | |
elif "reverse_string(\"\")" == actual_call_sim and "\"\"" == expected_sim: | |
passed_this_test = True # Empty string case usually handled | |
elif "reverse_string(123)" == actual_call_sim and "\"321\"" == expected_sim: | |
passed_this_test = True if "str(s)" in code_string and ("s[::-1]" in code_string or "char + reversed_string" in code_string) else False | |
else: # Randomize other equality tests | |
passed_this_test = random.random() > 0.3 # 70% chance of passing random asserts | |
if not passed_this_test: | |
error_this_test = f"Simulated AssertionError: {test_str} evaluated to False" | |
sim_stderr_lines.append(f"FAIL: {test_str}\n {error_this_test}") | |
individual_results.append(TestResult(test_string=test_str, passed=passed_this_test, error_message=error_this_test)) | |
final_success = not sim_compilation_error and not sim_timeout and not any(not tr.passed for tr in individual_results) and not (sim_stderr_lines and not individual_results) | |
return ExecutionResult( | |
success=final_success, | |
stdout="\n".join(sim_stdout_lines) if sim_stdout_lines else None, | |
stderr="\n".join(sim_stderr_lines) if sim_stderr_lines else None, | |
execution_time=min(time.time() - start_time, timeout_seconds), # Cap time for simulation | |
individual_test_results=individual_results, | |
compilation_error=sim_compilation_error, | |
timeout_error=sim_timeout | |
) | |
print("DEBUG: core.safe_executor (SIMULATED) - Module defined.") |