File size: 9,078 Bytes
3d7ae13
 
e3bf7f8
 
3d7ae13
 
 
e3bf7f8
 
 
 
 
 
 
 
3d7ae13
 
e3bf7f8
 
 
 
 
 
 
 
 
 
 
 
3d7ae13
e3bf7f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d7ae13
 
e3bf7f8
 
 
 
 
 
 
 
 
 
3d7ae13
e3bf7f8
 
3d7ae13
 
 
e3bf7f8
 
 
3d7ae13
e3bf7f8
 
 
 
 
 
3d7ae13
 
e3bf7f8
3d7ae13
e3bf7f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d7ae13
 
 
 
e3bf7f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d7ae13
e3bf7f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# algoforge_prime/core/safe_executor.py
# WARNING: THIS IS A SIMULATED SAFE EXECUTOR. DO NOT USE FOR REAL UNTRUSTED CODE.
# A real implementation would require robust sandboxing.
# This simulation is purely for demonstrating the application flow and data structures.

import time
import random
import traceback # For capturing simulated exception details

class TestResult:
    def __init__(self, test_string: str, passed: bool, error_message: str = None, output:str = None):
        self.test_string = test_string    # The original assert statement
        self.passed = passed
        self.error_message = error_message # e.g., AssertionError message
        self.output = output # Any stdout specifically for this test (advanced)

class ExecutionResult:
    def __init__(self,
                 success: bool, # Did the overall code snippet run without crashing the sandbox?
                 stdout: str = None,
                 stderr: str = None, # For general stderr, not specific test failures
                 execution_time: float = 0.0,
                 individual_test_results: list[TestResult] = None, # List of TestResult objects
                 compilation_error: str = None, # If code couldn't even compile/parse
                 timeout_error: bool = False
                ):
        self.success = success
        self.stdout = stdout if stdout else ""
        self.stderr = stderr if stderr else ""
        self.execution_time = execution_time
        self.individual_test_results = individual_test_results if individual_test_results else []
        self.compilation_error = compilation_error
        self.timeout_error = timeout_error

    @property
    def passed_tests(self) -> int:
        return sum(1 for tr in self.individual_test_results if tr.passed)

    @property
    def total_tests(self) -> int:
        return len(self.individual_test_results)

    @property
    def overall_error_summary(self) -> str:
        if self.compilation_error: return f"Compilation Error: {self.compilation_error}"
        if self.timeout_error: return "Execution Timed Out."
        if self.stderr and not self.individual_test_results: return f"Runtime Error: {self.stderr}" # General stderr if no specific test failures yet
        # If there are test results, specific test errors are more informative
        failed_tests = [tr for tr in self.individual_test_results if not tr.passed and tr.error_message]
        if failed_tests:
            return f"Failed Tests: {'; '.join([f'`{ft.test_string[:50]}...` ({ft.error_message[:50]}...)' for ft in failed_tests[:2]])}" # Show first 2
        if self.stderr: return f"General Stderr: {self.stderr}" # If tests passed but still stderr
        return None # No obvious top-level error

    def __str__(self):
        status = "SUCCESS (All tests passed)" if self.success and self.total_tests > 0 and self.passed_tests == self.total_tests else \
                 ("PARTIAL SUCCESS (Some tests passed)" if self.success and self.total_tests > 0 and self.passed_tests > 0 else \
                  ("SUCCESS (No tests run or all passed if no asserts)" if self.success else "FAILED"))
        
        error_info = self.overall_error_summary
        test_summary = f"Tests: {self.passed_tests}/{self.total_tests} passed." if self.total_tests > 0 else "No assert-based tests run."

        return (f"Execution {status}. {test_summary} Time: {self.execution_time:.4f}s. "
                f"{('Error: ' + error_info) if error_info else ''} "
                f"{('Stdout: ' + self.stdout[:100] + '...') if self.stdout else ''}")


def execute_python_code_with_tests(code_string: str, tests_string: str, timeout_seconds: int = 7) -> ExecutionResult:
    """
    SIMULATES sandboxed execution of Python code against a set of assert-based tests.
    """
    print(f"SIMULATOR: Attempting to 'execute' code. Timeout: {timeout_seconds}s")
    print(f"  Code (first 100 chars): {code_string[:100].strip()}...")
    print(f"  Tests (first 100 chars): {tests_string[:100].strip()}...")
    start_time = time.time()
    
    individual_results = []
    sim_stdout_lines = []
    sim_stderr_lines = []
    sim_compilation_error = None
    sim_timeout = False

    if not code_string.strip():
        return ExecutionResult(success=False, compilation_error="No code provided.", execution_time=time.time() - start_time)

    # Simulate compilation/syntax check (very naive)
    if "def foo bar" in code_string or "syntax error" in code_string.lower(): # Bad syntax example
        sim_compilation_error = "Simulated: Invalid syntax detected."
        return ExecutionResult(success=False, compilation_error=sim_compilation_error, execution_time=time.time()-start_time)

    # Simulate timeout
    if "while True:" in code_string and "break" not in code_string and "sleep" not in code_string:
        sim_timeout = True
        # time.sleep(timeout_seconds + 0.1) # Actual sleep for simulation
        return ExecutionResult(success=False, timeout_error=True, execution_time=min(timeout_seconds, time.time() - start_time), individual_test_results=individual_results)

    # Simulate runtime errors not caught by tests
    if "1/0" in code_string or "zerodivisionerror" in code_string.lower():
        sim_stderr_lines.append("Traceback (most recent call last):\n  File \"<string>\", line X, in <module>\nZeroDivisionError: division by zero")
        return ExecutionResult(success=False, stderr="\n".join(sim_stderr_lines), execution_time=time.time()-start_time, individual_test_results=individual_results)
    if "undefined_variable_xyz" in code_string:
        sim_stderr_lines.append("Traceback (most recent call last):\n  File \"<string>\", line Y, in <module>\nNameError: name 'undefined_variable_xyz' is not defined")
        return ExecutionResult(success=False, stderr="\n".join(sim_stderr_lines), execution_time=time.time()-start_time, individual_test_results=individual_results)

    # Simulate print statements in the main code
    if "print('Setup complete')" in code_string:
        sim_stdout_lines.append("Setup complete")


    test_lines = [line.strip() for line in tests_string.splitlines() if line.strip().startswith("assert")]
    
    for test_str in test_lines:
        passed_this_test = True
        error_this_test = None
        # Super naive simulation based on keywords in the test or code
        # This needs to be replaced by actual execution in a sandbox.
        if "None" in test_str and "TypeError" in code_string and "raise TypeError" in code_string:
            if "reverse_string(None)" in test_str: # Specific to the example
                passed_this_test = True # Simulating a try-except test for TypeError
                sim_stdout_lines.append(f"SimTest '{test_str[:30]}...': PASSED (TypeError correctly raised and caught - simulated)")
            else: # General "None" test might fail if code doesn't handle it right
                passed_this_test = random.choice([True, False, False]) # Make None checks less likely to pass randomly
        elif "==" in test_str:
            parts = test_str.split("==")
            actual_call_sim = parts[0].replace("assert", "").strip()
            expected_sim = parts[1].strip()
            
            # Simulate based on a few known patterns from the example
            if "reverse_string(\"hello\")" == actual_call_sim and "\"olleh\"" == expected_sim:
                passed_this_test = True if "s[::-1]" in code_string or "char + reversed_string" in code_string else False
            elif "reverse_string(\"\")" == actual_call_sim and "\"\"" == expected_sim:
                passed_this_test = True # Empty string case usually handled
            elif "reverse_string(123)" == actual_call_sim and "\"321\"" == expected_sim:
                passed_this_test = True if "str(s)" in code_string and ("s[::-1]" in code_string or "char + reversed_string" in code_string) else False
            else: # Randomize other equality tests
                passed_this_test = random.random() > 0.3 # 70% chance of passing random asserts

        if not passed_this_test:
            error_this_test = f"Simulated AssertionError: {test_str} evaluated to False"
            sim_stderr_lines.append(f"FAIL: {test_str}\n  {error_this_test}")
        
        individual_results.append(TestResult(test_string=test_str, passed=passed_this_test, error_message=error_this_test))

    final_success = not sim_compilation_error and not sim_timeout and not any(not tr.passed for tr in individual_results) and not (sim_stderr_lines and not individual_results)

    return ExecutionResult(
        success=final_success,
        stdout="\n".join(sim_stdout_lines) if sim_stdout_lines else None,
        stderr="\n".join(sim_stderr_lines) if sim_stderr_lines else None,
        execution_time=min(time.time() - start_time, timeout_seconds), # Cap time for simulation
        individual_test_results=individual_results,
        compilation_error=sim_compilation_error,
        timeout_error=sim_timeout
    )

print("DEBUG: core.safe_executor (SIMULATED) - Module defined.")