mgbam commited on
Commit
e3bf7f8
·
verified ·
1 Parent(s): d706935

Update core/safe_executor.py

Browse files
Files changed (1) hide show
  1. core/safe_executor.py +133 -108
core/safe_executor.py CHANGED
@@ -1,131 +1,156 @@
1
  # algoforge_prime/core/safe_executor.py
2
  # WARNING: THIS IS A SIMULATED SAFE EXECUTOR. DO NOT USE FOR REAL UNTRUSTED CODE.
3
- # A real implementation would require robust sandboxing (Docker, nsjail, Firecracker, WASM, etc.)
4
- # This simulation is purely for demonstrating the application flow.
5
 
6
  import time
7
  import random
8
- import traceback
 
 
 
 
 
 
 
9
 
10
  class ExecutionResult:
11
- def __init__(self, success: bool, output: str = None, error: str = None, execution_time: float = 0.0, passed_tests: int = 0, total_tests: int = 0):
12
- self.success = success # True if code ran without crashing (not necessarily if tests passed)
13
- self.output = output # Stdout from the execution
14
- self.error = error # Stderr or exception message
 
 
 
 
 
 
 
 
15
  self.execution_time = execution_time
16
- self.passed_tests = passed_tests # Number of 'assert' statements that passed
17
- self.total_tests = total_tests # Total 'assert' statements found and attempted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def __str__(self):
20
- if self.success:
21
- test_summary = f"Tests: {self.passed_tests}/{self.total_tests} passed. " if self.total_tests > 0 else ""
22
- return f"Execution OK. {test_summary}Time: {self.execution_time:.4f}s. Output: {str(self.output)[:200]}"
23
- else:
24
- return f"Execution FAILED. Error: {str(self.error)[:300]}. Time: {self.execution_time:.4f}s"
 
 
 
 
 
25
 
26
- def execute_python_code_with_tests(code_string: str, tests_string: str, timeout_seconds: int = 5) -> ExecutionResult:
 
27
  """
28
  SIMULATES sandboxed execution of Python code against a set of assert-based tests.
29
- In a real system, this function would be a complex interface to a secure sandbox.
30
  """
31
- print(f"SIMULATOR: Attempting to 'execute' code with tests. Timeout: {timeout_seconds}s")
 
 
32
  start_time = time.time()
 
 
 
 
 
 
33
 
34
  if not code_string.strip():
35
- return ExecutionResult(success=False, error="No code provided to execute.", execution_time=time.time() - start_time)
36
 
37
- # Basic simulation:
38
- # 1. Check for obvious syntax issues (very naive)
39
- if "def " not in code_string and "class " not in code_string and not any(op in code_string for op in ["=", "+", "-", "*", "/"]):
40
- # If it doesn't look like defining something or doing operations, maybe it's just a malformed snippet
41
- if len(code_string) < 50 and "print" not in code_string : # very arbitrary
42
- return ExecutionResult(success=False, error="Simulated: Code appears incomplete or malformed for execution.", execution_time=time.time()-start_time)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
- # 2. Simulate test parsing and execution
46
  test_lines = [line.strip() for line in tests_string.splitlines() if line.strip().startswith("assert")]
47
- total_tests = len(test_lines)
48
- passed_tests = 0
49
- simulated_stdout = []
50
- simulated_stderr = None
51
-
52
- if total_tests == 0 and tests_string.strip(): # If tests were provided but no asserts found
53
- simulated_stderr = "Simulated: Test string provided, but no 'assert' statements found."
54
- # We can still "run" the code itself without asserts
55
 
56
- # Simulate code "running"
57
- try:
58
- if "loop forever" in code_string.lower() or "while True:" in code_string and "break" not in code_string: # Timeout simulation
59
- time.sleep(timeout_seconds + 0.1) # Exceed timeout
60
- raise TimeoutError("Simulated: Code execution timed out.")
61
-
62
- # Simulate print statements
63
- if "print('hello world')" in code_string:
64
- simulated_stdout.append("hello world")
65
- if "print(1+1)" in code_string:
66
- simulated_stdout.append("2")
67
-
68
- # Simulate errors
69
- if "1/0" in code_string or "zerodivisionerror" in code_string:
70
- raise ZeroDivisionError("Simulated: division by zero")
71
- if "undefined_variable" in code_string:
72
- raise NameError("Simulated: name 'undefined_variable' is not defined")
73
-
74
- # Simulate test passing/failing (very naively)
75
- if total_tests > 0:
76
- for i, test_line in enumerate(test_lines):
77
- # This is extremely basic and not real evaluation.
78
- # A real system would execute each assert in the context of the provided code.
79
- if "==" in test_line:
80
- if "True" in test_line or "120" in test_line or "correct" in test_line.lower(): # Naive pass conditions
81
- if random.random() > 0.1: # 90% chance of passing "good" tests
82
- passed_tests += 1
83
- simulated_stdout.append(f"Simulated Test {i+1} ({test_line[:30]}...): PASSED")
84
- else:
85
- simulated_stdout.append(f"Simulated Test {i+1} ({test_line[:30]}...): FAILED (Random Sim)")
86
- if not simulated_stderr: simulated_stderr = f"Simulated: Assertion failed on test {i+1}"
87
- elif "False" in test_line or "wrong" in test_line.lower(): # Naive fail conditions
88
- if random.random() > 0.1: # 90% chance of failing "bad" tests
89
- simulated_stdout.append(f"Simulated Test {i+1} ({test_line[:30]}...): FAILED (Expected by Sim)")
90
- if not simulated_stderr: simulated_stderr = f"Simulated: Assertion failed on test {i+1} (Expected by Sim)"
91
- else: # 10% chance of unexpectedly passing
92
- passed_tests +=1
93
- simulated_stdout.append(f"Simulated Test {i+1} ({test_line[:30]}...): PASSED (Unexpected by Sim)")
94
- else: # Other asserts
95
- if random.random() > 0.5: # 50/50
96
- passed_tests += 1
97
- simulated_stdout.append(f"Simulated Test {i+1} ({test_line[:30]}...): PASSED (Random Sim)")
98
- else:
99
- simulated_stdout.append(f"Simulated Test {i+1} ({test_line[:30]}...): FAILED (Random Sim)")
100
- if not simulated_stderr: simulated_stderr = f"Simulated: Assertion failed on test {i+1} (Random Sim)"
101
- else: # Non-equality asserts, just pass some randomly
102
- if random.random() > 0.3: passed_tests +=1
103
 
104
- # If no explicit error, but not all tests passed
105
- if total_tests > 0 and passed_tests < total_tests and not simulated_stderr:
106
- simulated_stderr = f"Simulated: {total_tests - passed_tests} out of {total_tests} assertions failed."
107
-
108
- execution_time = time.time() - start_time
109
- if execution_time > timeout_seconds: # Check after simulated work
110
- raise TimeoutError("Simulated: Code execution exceeded timeout.")
111
-
112
- return ExecutionResult(
113
- success=True if not simulated_stderr or (total_tests > 0 and passed_tests == total_tests) else False, # Success if no errors or all tests pass
114
- output="\n".join(simulated_stdout) if simulated_stdout else None,
115
- error=simulated_stderr,
116
- execution_time=execution_time,
117
- passed_tests=passed_tests,
118
- total_tests=total_tests
119
- )
120
-
121
- except Exception as e:
122
- execution_time = time.time() - start_time
123
- tb_str = traceback.format_exc()
124
- print(f"SIMULATOR EXCEPTION: {e}\n{tb_str}")
125
- return ExecutionResult(
126
- success=False,
127
- error=f"Simulated Exception: {type(e).__name__} - {str(e)}",
128
- execution_time=execution_time,
129
- passed_tests=passed_tests, # Could be some if error happened mid-tests
130
- total_tests=total_tests
131
- )
 
1
  # algoforge_prime/core/safe_executor.py
2
  # WARNING: THIS IS A SIMULATED SAFE EXECUTOR. DO NOT USE FOR REAL UNTRUSTED CODE.
3
+ # A real implementation would require robust sandboxing.
4
+ # This simulation is purely for demonstrating the application flow and data structures.
5
 
6
  import time
7
  import random
8
+ import traceback # For capturing simulated exception details
9
+
10
+ class TestResult:
11
+ def __init__(self, test_string: str, passed: bool, error_message: str = None, output:str = None):
12
+ self.test_string = test_string # The original assert statement
13
+ self.passed = passed
14
+ self.error_message = error_message # e.g., AssertionError message
15
+ self.output = output # Any stdout specifically for this test (advanced)
16
 
17
  class ExecutionResult:
18
+ def __init__(self,
19
+ success: bool, # Did the overall code snippet run without crashing the sandbox?
20
+ stdout: str = None,
21
+ stderr: str = None, # For general stderr, not specific test failures
22
+ execution_time: float = 0.0,
23
+ individual_test_results: list[TestResult] = None, # List of TestResult objects
24
+ compilation_error: str = None, # If code couldn't even compile/parse
25
+ timeout_error: bool = False
26
+ ):
27
+ self.success = success
28
+ self.stdout = stdout if stdout else ""
29
+ self.stderr = stderr if stderr else ""
30
  self.execution_time = execution_time
31
+ self.individual_test_results = individual_test_results if individual_test_results else []
32
+ self.compilation_error = compilation_error
33
+ self.timeout_error = timeout_error
34
+
35
+ @property
36
+ def passed_tests(self) -> int:
37
+ return sum(1 for tr in self.individual_test_results if tr.passed)
38
+
39
+ @property
40
+ def total_tests(self) -> int:
41
+ return len(self.individual_test_results)
42
+
43
+ @property
44
+ def overall_error_summary(self) -> str:
45
+ if self.compilation_error: return f"Compilation Error: {self.compilation_error}"
46
+ if self.timeout_error: return "Execution Timed Out."
47
+ if self.stderr and not self.individual_test_results: return f"Runtime Error: {self.stderr}" # General stderr if no specific test failures yet
48
+ # If there are test results, specific test errors are more informative
49
+ failed_tests = [tr for tr in self.individual_test_results if not tr.passed and tr.error_message]
50
+ if failed_tests:
51
+ return f"Failed Tests: {'; '.join([f'`{ft.test_string[:50]}...` ({ft.error_message[:50]}...)' for ft in failed_tests[:2]])}" # Show first 2
52
+ if self.stderr: return f"General Stderr: {self.stderr}" # If tests passed but still stderr
53
+ return None # No obvious top-level error
54
 
55
  def __str__(self):
56
+ status = "SUCCESS (All tests passed)" if self.success and self.total_tests > 0 and self.passed_tests == self.total_tests else \
57
+ ("PARTIAL SUCCESS (Some tests passed)" if self.success and self.total_tests > 0 and self.passed_tests > 0 else \
58
+ ("SUCCESS (No tests run or all passed if no asserts)" if self.success else "FAILED"))
59
+
60
+ error_info = self.overall_error_summary
61
+ test_summary = f"Tests: {self.passed_tests}/{self.total_tests} passed." if self.total_tests > 0 else "No assert-based tests run."
62
+
63
+ return (f"Execution {status}. {test_summary} Time: {self.execution_time:.4f}s. "
64
+ f"{('Error: ' + error_info) if error_info else ''} "
65
+ f"{('Stdout: ' + self.stdout[:100] + '...') if self.stdout else ''}")
66
 
67
+
68
+ def execute_python_code_with_tests(code_string: str, tests_string: str, timeout_seconds: int = 7) -> ExecutionResult:
69
  """
70
  SIMULATES sandboxed execution of Python code against a set of assert-based tests.
 
71
  """
72
+ print(f"SIMULATOR: Attempting to 'execute' code. Timeout: {timeout_seconds}s")
73
+ print(f" Code (first 100 chars): {code_string[:100].strip()}...")
74
+ print(f" Tests (first 100 chars): {tests_string[:100].strip()}...")
75
  start_time = time.time()
76
+
77
+ individual_results = []
78
+ sim_stdout_lines = []
79
+ sim_stderr_lines = []
80
+ sim_compilation_error = None
81
+ sim_timeout = False
82
 
83
  if not code_string.strip():
84
+ return ExecutionResult(success=False, compilation_error="No code provided.", execution_time=time.time() - start_time)
85
 
86
+ # Simulate compilation/syntax check (very naive)
87
+ if "def foo bar" in code_string or "syntax error" in code_string.lower(): # Bad syntax example
88
+ sim_compilation_error = "Simulated: Invalid syntax detected."
89
+ return ExecutionResult(success=False, compilation_error=sim_compilation_error, execution_time=time.time()-start_time)
90
+
91
+ # Simulate timeout
92
+ if "while True:" in code_string and "break" not in code_string and "sleep" not in code_string:
93
+ sim_timeout = True
94
+ # time.sleep(timeout_seconds + 0.1) # Actual sleep for simulation
95
+ return ExecutionResult(success=False, timeout_error=True, execution_time=min(timeout_seconds, time.time() - start_time), individual_test_results=individual_results)
96
+
97
+ # Simulate runtime errors not caught by tests
98
+ if "1/0" in code_string or "zerodivisionerror" in code_string.lower():
99
+ sim_stderr_lines.append("Traceback (most recent call last):\n File \"<string>\", line X, in <module>\nZeroDivisionError: division by zero")
100
+ return ExecutionResult(success=False, stderr="\n".join(sim_stderr_lines), execution_time=time.time()-start_time, individual_test_results=individual_results)
101
+ if "undefined_variable_xyz" in code_string:
102
+ sim_stderr_lines.append("Traceback (most recent call last):\n File \"<string>\", line Y, in <module>\nNameError: name 'undefined_variable_xyz' is not defined")
103
+ return ExecutionResult(success=False, stderr="\n".join(sim_stderr_lines), execution_time=time.time()-start_time, individual_test_results=individual_results)
104
+
105
+ # Simulate print statements in the main code
106
+ if "print('Setup complete')" in code_string:
107
+ sim_stdout_lines.append("Setup complete")
108
 
109
 
 
110
  test_lines = [line.strip() for line in tests_string.splitlines() if line.strip().startswith("assert")]
 
 
 
 
 
 
 
 
111
 
112
+ for test_str in test_lines:
113
+ passed_this_test = True
114
+ error_this_test = None
115
+ # Super naive simulation based on keywords in the test or code
116
+ # This needs to be replaced by actual execution in a sandbox.
117
+ if "None" in test_str and "TypeError" in code_string and "raise TypeError" in code_string:
118
+ if "reverse_string(None)" in test_str: # Specific to the example
119
+ passed_this_test = True # Simulating a try-except test for TypeError
120
+ sim_stdout_lines.append(f"SimTest '{test_str[:30]}...': PASSED (TypeError correctly raised and caught - simulated)")
121
+ else: # General "None" test might fail if code doesn't handle it right
122
+ passed_this_test = random.choice([True, False, False]) # Make None checks less likely to pass randomly
123
+ elif "==" in test_str:
124
+ parts = test_str.split("==")
125
+ actual_call_sim = parts[0].replace("assert", "").strip()
126
+ expected_sim = parts[1].strip()
127
+
128
+ # Simulate based on a few known patterns from the example
129
+ if "reverse_string(\"hello\")" == actual_call_sim and "\"olleh\"" == expected_sim:
130
+ passed_this_test = True if "s[::-1]" in code_string or "char + reversed_string" in code_string else False
131
+ elif "reverse_string(\"\")" == actual_call_sim and "\"\"" == expected_sim:
132
+ passed_this_test = True # Empty string case usually handled
133
+ elif "reverse_string(123)" == actual_call_sim and "\"321\"" == expected_sim:
134
+ passed_this_test = True if "str(s)" in code_string and ("s[::-1]" in code_string or "char + reversed_string" in code_string) else False
135
+ else: # Randomize other equality tests
136
+ passed_this_test = random.random() > 0.3 # 70% chance of passing random asserts
137
+
138
+ if not passed_this_test:
139
+ error_this_test = f"Simulated AssertionError: {test_str} evaluated to False"
140
+ sim_stderr_lines.append(f"FAIL: {test_str}\n {error_this_test}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ individual_results.append(TestResult(test_string=test_str, passed=passed_this_test, error_message=error_this_test))
143
+
144
+ final_success = not sim_compilation_error and not sim_timeout and not any(not tr.passed for tr in individual_results) and not (sim_stderr_lines and not individual_results)
145
+
146
+ return ExecutionResult(
147
+ success=final_success,
148
+ stdout="\n".join(sim_stdout_lines) if sim_stdout_lines else None,
149
+ stderr="\n".join(sim_stderr_lines) if sim_stderr_lines else None,
150
+ execution_time=min(time.time() - start_time, timeout_seconds), # Cap time for simulation
151
+ individual_test_results=individual_results,
152
+ compilation_error=sim_compilation_error,
153
+ timeout_error=sim_timeout
154
+ )
155
+
156
+ print("DEBUG: core.safe_executor (SIMULATED) - Module defined.")