Spaces:
Build error
Build error
import os | |
import tempfile | |
from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult | |
from evaluation.utils.shared import assert_and_raise | |
from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction | |
from openhands.events.event import Event | |
from openhands.events.observation import AgentDelegateObservation | |
from openhands.runtime.base import Runtime | |
HTML_FILE = """ | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>The Ultimate Answer</title> | |
<style> | |
body { | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
height: 100vh; | |
margin: 0; | |
background: linear-gradient(to right, #1e3c72, #2a5298); | |
color: #fff; | |
font-family: 'Arial', sans-serif; | |
text-align: center; | |
} | |
.container { | |
text-align: center; | |
padding: 20px; | |
background: rgba(255, 255, 255, 0.1); | |
border-radius: 10px; | |
box-shadow: 0 0 10px rgba(0, 0, 0, 0.2); | |
} | |
h1 { | |
font-size: 36px; | |
margin-bottom: 20px; | |
} | |
p { | |
font-size: 18px; | |
margin-bottom: 30px; | |
} | |
#showButton { | |
padding: 10px 20px; | |
font-size: 16px; | |
color: #1e3c72; | |
background: #fff; | |
border: none; | |
border-radius: 5px; | |
cursor: pointer; | |
transition: background 0.3s ease; | |
} | |
#showButton:hover { | |
background: #f0f0f0; | |
} | |
#result { | |
margin-top: 20px; | |
font-size: 24px; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>The Ultimate Answer</h1> | |
<p>Click the button to reveal the answer to life, the universe, and everything.</p> | |
<button id="showButton">Click me</button> | |
<div id="result"></div> | |
</div> | |
<script> | |
document.getElementById('showButton').addEventListener('click', function() { | |
document.getElementById('result').innerText = 'The answer is OpenHands is all you need!'; | |
}); | |
</script> | |
</body> | |
</html> | |
""" | |
class Test(BaseIntegrationTest): | |
INSTRUCTION = 'Browse localhost:8000, and tell me the ultimate answer to life.' | |
def initialize_runtime(cls, runtime: Runtime) -> None: | |
action = CmdRunAction(command='mkdir -p /workspace') | |
obs = runtime.run_action(action) | |
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}') | |
action = CmdRunAction(command='mkdir -p /tmp/server') | |
obs = runtime.run_action(action) | |
assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}') | |
# create a file with a typo in /workspace/bad.txt | |
with tempfile.TemporaryDirectory() as temp_dir: | |
temp_file_path = os.path.join(temp_dir, 'index.html') | |
with open(temp_file_path, 'w') as f: | |
f.write(HTML_FILE) | |
# Copy the file to the desired location | |
runtime.copy_to(temp_file_path, '/tmp/server') | |
# create README.md | |
action = CmdRunAction( | |
command='cd /tmp/server && nohup python3 -m http.server 8000 &' | |
) | |
obs = runtime.run_action(action) | |
def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult: | |
from openhands.core.logger import openhands_logger as logger | |
# check if the "The answer is OpenHands is all you need!" is in any message | |
message_actions = [ | |
event | |
for event in histories | |
if isinstance( | |
event, (MessageAction, AgentFinishAction, AgentDelegateObservation) | |
) | |
] | |
logger.debug(f'Total message-like events: {len(message_actions)}') | |
for event in message_actions: | |
try: | |
if isinstance(event, AgentDelegateObservation): | |
content = event.content | |
elif isinstance(event, AgentFinishAction): | |
content = event.outputs.get('content', '') | |
elif isinstance(event, MessageAction): | |
content = event.content | |
else: | |
logger.warning(f'Unexpected event type: {type(event)}') | |
continue | |
if 'OpenHands is all you need!' in content: | |
return TestResult(success=True) | |
except Exception as e: | |
logger.error(f'Error processing event: {e}') | |
logger.debug( | |
f'Total messages: {len(message_actions)}. Messages: {message_actions}' | |
) | |
return TestResult( | |
success=False, | |
reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.', | |
) | |