Spaces:
Build error
Build error
import os | |
import re | |
import time | |
import traceback | |
import uuid | |
from enum import Enum | |
from typing import Any | |
import bashlex | |
import libtmux | |
from openhands.core.logger import openhands_logger as logger | |
from openhands.events.action import CmdRunAction | |
from openhands.events.observation import ErrorObservation | |
from openhands.events.observation.commands import ( | |
CMD_OUTPUT_PS1_END, | |
CmdOutputMetadata, | |
CmdOutputObservation, | |
) | |
from openhands.runtime.utils.bash_constants import TIMEOUT_MESSAGE_TEMPLATE | |
from openhands.utils.shutdown_listener import should_continue | |
def split_bash_commands(commands: str) -> list[str]: | |
if not commands.strip(): | |
return [''] | |
try: | |
parsed = bashlex.parse(commands) | |
except ( | |
bashlex.errors.ParsingError, | |
NotImplementedError, | |
TypeError, | |
AttributeError, | |
): | |
# Added AttributeError to catch 'str' object has no attribute 'kind' error (issue #8369) | |
logger.debug( | |
f'Failed to parse bash commands\n' | |
f'[input]: {commands}\n' | |
f'[warning]: {traceback.format_exc()}\n' | |
f'The original command will be returned as is.' | |
) | |
# If parsing fails, return the original commands | |
return [commands] | |
result: list[str] = [] | |
last_end = 0 | |
for node in parsed: | |
start, end = node.pos | |
# Include any text between the last command and this one | |
if start > last_end: | |
between = commands[last_end:start] | |
logger.debug(f'BASH PARSING between: {between}') | |
if result: | |
result[-1] += between.rstrip() | |
elif between.strip(): | |
# THIS SHOULD NOT HAPPEN | |
result.append(between.rstrip()) | |
# Extract the command, preserving original formatting | |
command = commands[start:end].rstrip() | |
logger.debug(f'BASH PARSING command: {command}') | |
result.append(command) | |
last_end = end | |
# Add any remaining text after the last command to the last command | |
remaining = commands[last_end:].rstrip() | |
logger.debug(f'BASH PARSING remaining: {remaining}') | |
if last_end < len(commands) and result: | |
result[-1] += remaining | |
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}') | |
elif last_end < len(commands): | |
if remaining: | |
result.append(remaining) | |
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}') | |
return result | |
def escape_bash_special_chars(command: str) -> str: | |
r""" | |
Escapes characters that have different interpretations in bash vs python. | |
Specifically handles escape sequences like \;, \|, \&, etc. | |
""" | |
if command.strip() == '': | |
return '' | |
try: | |
parts = [] | |
last_pos = 0 | |
def visit_node(node: Any) -> None: | |
nonlocal last_pos | |
if ( | |
node.kind == 'redirect' | |
and hasattr(node, 'heredoc') | |
and node.heredoc is not None | |
): | |
# We're entering a heredoc - preserve everything as-is until we see EOF | |
# Store the heredoc end marker (usually 'EOF' but could be different) | |
between = command[last_pos : node.pos[0]] | |
parts.append(between) | |
# Add the heredoc start marker | |
parts.append(command[node.pos[0] : node.heredoc.pos[0]]) | |
# Add the heredoc content as-is | |
parts.append(command[node.heredoc.pos[0] : node.heredoc.pos[1]]) | |
last_pos = node.pos[1] | |
return | |
if node.kind == 'word': | |
# Get the raw text between the last position and current word | |
between = command[last_pos : node.pos[0]] | |
word_text = command[node.pos[0] : node.pos[1]] | |
# Add the between text, escaping special characters | |
between = re.sub(r'\\([;&|><])', r'\\\\\1', between) | |
parts.append(between) | |
# Check if word_text is a quoted string or command substitution | |
if ( | |
(word_text.startswith('"') and word_text.endswith('"')) | |
or (word_text.startswith("'") and word_text.endswith("'")) | |
or (word_text.startswith('$(') and word_text.endswith(')')) | |
or (word_text.startswith('`') and word_text.endswith('`')) | |
): | |
# Preserve quoted strings, command substitutions, and heredoc content as-is | |
parts.append(word_text) | |
else: | |
# Escape special chars in unquoted text | |
word_text = re.sub(r'\\([;&|><])', r'\\\\\1', word_text) | |
parts.append(word_text) | |
last_pos = node.pos[1] | |
return | |
# Visit child nodes | |
if hasattr(node, 'parts'): | |
for part in node.parts: | |
visit_node(part) | |
# Process all nodes in the AST | |
nodes = list(bashlex.parse(command)) | |
for node in nodes: | |
between = command[last_pos : node.pos[0]] | |
between = re.sub(r'\\([;&|><])', r'\\\\\1', between) | |
parts.append(between) | |
last_pos = node.pos[0] | |
visit_node(node) | |
# Handle any remaining text after the last word | |
remaining = command[last_pos:] | |
parts.append(remaining) | |
return ''.join(parts) | |
except (bashlex.errors.ParsingError, NotImplementedError, TypeError): | |
logger.debug( | |
f'Failed to parse bash commands for special characters escape\n' | |
f'[input]: {command}\n' | |
f'[warning]: {traceback.format_exc()}\n' | |
f'The original command will be returned as is.' | |
) | |
return command | |
class BashCommandStatus(Enum): | |
CONTINUE = 'continue' | |
COMPLETED = 'completed' | |
NO_CHANGE_TIMEOUT = 'no_change_timeout' | |
HARD_TIMEOUT = 'hard_timeout' | |
def _remove_command_prefix(command_output: str, command: str) -> str: | |
return command_output.lstrip().removeprefix(command.lstrip()).lstrip() | |
class BashSession: | |
POLL_INTERVAL = 0.5 | |
HISTORY_LIMIT = 10_000 | |
PS1 = CmdOutputMetadata.to_ps1_prompt() | |
def __init__( | |
self, | |
work_dir: str, | |
username: str | None = None, | |
no_change_timeout_seconds: int = 30, | |
max_memory_mb: int | None = None, | |
): | |
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds | |
self.work_dir = work_dir | |
self.username = username | |
self._initialized = False | |
self.max_memory_mb = max_memory_mb | |
def initialize(self) -> None: | |
self.server = libtmux.Server() | |
_shell_command = '/bin/bash' | |
if self.username in ['root', 'openhands']: | |
# This starts a non-login (new) shell for the given user | |
_shell_command = f'su {self.username} -' | |
# FIXME: we will introduce memory limit using sysbox-runc in coming PR | |
# # otherwise, we are running as the CURRENT USER (e.g., when running LocalRuntime) | |
# if self.max_memory_mb is not None: | |
# window_command = ( | |
# f'prlimit --as={self.max_memory_mb * 1024 * 1024} {_shell_command}' | |
# ) | |
# else: | |
window_command = _shell_command | |
logger.debug(f'Initializing bash session with command: {window_command}') | |
session_name = f'openhands-{self.username}-{uuid.uuid4()}' | |
self.session = self.server.new_session( | |
session_name=session_name, | |
start_directory=self.work_dir, # This parameter is supported by libtmux | |
kill_session=True, | |
x=1000, | |
y=1000, | |
) | |
# Set history limit to a large number to avoid losing history | |
# https://unix.stackexchange.com/questions/43414/unlimited-history-in-tmux | |
self.session.set_option('history-limit', str(self.HISTORY_LIMIT), _global=True) | |
self.session.history_limit = self.HISTORY_LIMIT | |
# We need to create a new pane because the initial pane's history limit is (default) 2000 | |
_initial_window = self.session.active_window | |
self.window = self.session.new_window( | |
window_name='bash', | |
window_shell=window_command, | |
start_directory=self.work_dir, # This parameter is supported by libtmux | |
) | |
self.pane = self.window.active_pane | |
logger.debug(f'pane: {self.pane}; history_limit: {self.session.history_limit}') | |
_initial_window.kill_window() | |
# Configure bash to use simple PS1 and disable PS2 | |
self.pane.send_keys( | |
f'export PROMPT_COMMAND=\'export PS1="{self.PS1}"\'; export PS2=""' | |
) | |
time.sleep(0.1) # Wait for command to take effect | |
self._clear_screen() | |
# Store the last command for interactive input handling | |
self.prev_status: BashCommandStatus | None = None | |
self.prev_output: str = '' | |
self._closed: bool = False | |
logger.debug(f'Bash session initialized with work dir: {self.work_dir}') | |
# Maintain the current working directory | |
self._cwd = os.path.abspath(self.work_dir) | |
self._initialized = True | |
def __del__(self) -> None: | |
"""Ensure the session is closed when the object is destroyed.""" | |
self.close() | |
def _get_pane_content(self) -> str: | |
"""Capture the current pane content and update the buffer.""" | |
content = '\n'.join( | |
map( | |
# avoid double newlines | |
lambda line: line.rstrip(), | |
self.pane.cmd('capture-pane', '-J', '-pS', '-').stdout, | |
) | |
) | |
return content | |
def close(self) -> None: | |
"""Clean up the session.""" | |
if self._closed: | |
return | |
self.session.kill_session() | |
self._closed = True | |
def cwd(self) -> str: | |
return self._cwd | |
def _is_special_key(self, command: str) -> bool: | |
"""Check if the command is a special key.""" | |
# Special keys are of the form C-<key> | |
_command = command.strip() | |
return _command.startswith('C-') and len(_command) == 3 | |
def _clear_screen(self) -> None: | |
"""Clear the tmux pane screen and history.""" | |
self.pane.send_keys('C-l', enter=False) | |
time.sleep(0.1) | |
self.pane.cmd('clear-history') | |
def _get_command_output( | |
self, | |
command: str, | |
raw_command_output: str, | |
metadata: CmdOutputMetadata, | |
continue_prefix: str = '', | |
) -> str: | |
"""Get the command output with the previous command output removed. | |
Args: | |
command: The command that was executed. | |
raw_command_output: The raw output from the command. | |
metadata: The metadata object to store prefix/suffix in. | |
continue_prefix: The prefix to add to the command output if it's a continuation of the previous command. | |
""" | |
# remove the previous command output from the new output if any | |
if self.prev_output: | |
command_output = raw_command_output.removeprefix(self.prev_output) | |
metadata.prefix = continue_prefix | |
else: | |
command_output = raw_command_output | |
self.prev_output = raw_command_output # update current command output anyway | |
command_output = _remove_command_prefix(command_output, command) | |
return command_output.rstrip() | |
def _handle_completed_command( | |
self, command: str, pane_content: str, ps1_matches: list[re.Match] | |
) -> CmdOutputObservation: | |
is_special_key = self._is_special_key(command) | |
assert len(ps1_matches) >= 1, ( | |
f'Expected at least one PS1 metadata block, but got {len(ps1_matches)}.\n' | |
f'---FULL OUTPUT---\n{pane_content!r}\n---END OF OUTPUT---' | |
) | |
metadata = CmdOutputMetadata.from_ps1_match(ps1_matches[-1]) | |
# Special case where the previous command output is truncated due to history limit | |
# We should get the content BEFORE the last PS1 prompt | |
get_content_before_last_match = bool(len(ps1_matches) == 1) | |
# Update the current working directory if it has changed | |
if metadata.working_dir != self._cwd and metadata.working_dir: | |
self._cwd = metadata.working_dir | |
logger.debug(f'COMMAND OUTPUT: {pane_content}') | |
# Extract the command output between the two PS1 prompts | |
raw_command_output = self._combine_outputs_between_matches( | |
pane_content, | |
ps1_matches, | |
get_content_before_last_match=get_content_before_last_match, | |
) | |
if get_content_before_last_match: | |
# Count the number of lines in the truncated output | |
num_lines = len(raw_command_output.splitlines()) | |
metadata.prefix = f'[Previous command outputs are truncated. Showing the last {num_lines} lines of the output below.]\n' | |
metadata.suffix = ( | |
f'\n[The command completed with exit code {metadata.exit_code}.]' | |
if not is_special_key | |
else f'\n[The command completed with exit code {metadata.exit_code}. CTRL+{command[-1].upper()} was sent.]' | |
) | |
command_output = self._get_command_output( | |
command, | |
raw_command_output, | |
metadata, | |
) | |
self.prev_status = BashCommandStatus.COMPLETED | |
self.prev_output = '' # Reset previous command output | |
self._ready_for_next_command() | |
return CmdOutputObservation( | |
content=command_output, | |
command=command, | |
metadata=metadata, | |
) | |
def _handle_nochange_timeout_command( | |
self, | |
command: str, | |
pane_content: str, | |
ps1_matches: list[re.Match], | |
) -> CmdOutputObservation: | |
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT | |
if len(ps1_matches) != 1: | |
logger.warning( | |
'Expected exactly one PS1 metadata block BEFORE the execution of a command, ' | |
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---' | |
) | |
raw_command_output = self._combine_outputs_between_matches( | |
pane_content, ps1_matches | |
) | |
metadata = CmdOutputMetadata() # No metadata available | |
metadata.suffix = ( | |
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. ' | |
f'{TIMEOUT_MESSAGE_TEMPLATE}]' | |
) | |
command_output = self._get_command_output( | |
command, | |
raw_command_output, | |
metadata, | |
continue_prefix='[Below is the output of the previous command.]\n', | |
) | |
return CmdOutputObservation( | |
content=command_output, | |
command=command, | |
metadata=metadata, | |
) | |
def _handle_hard_timeout_command( | |
self, | |
command: str, | |
pane_content: str, | |
ps1_matches: list[re.Match], | |
timeout: float, | |
) -> CmdOutputObservation: | |
self.prev_status = BashCommandStatus.HARD_TIMEOUT | |
if len(ps1_matches) != 1: | |
logger.warning( | |
'Expected exactly one PS1 metadata block BEFORE the execution of a command, ' | |
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---' | |
) | |
raw_command_output = self._combine_outputs_between_matches( | |
pane_content, ps1_matches | |
) | |
metadata = CmdOutputMetadata() # No metadata available | |
metadata.suffix = ( | |
f'\n[The command timed out after {timeout} seconds. ' | |
f'{TIMEOUT_MESSAGE_TEMPLATE}]' | |
) | |
command_output = self._get_command_output( | |
command, | |
raw_command_output, | |
metadata, | |
continue_prefix='[Below is the output of the previous command.]\n', | |
) | |
return CmdOutputObservation( | |
command=command, | |
content=command_output, | |
metadata=metadata, | |
) | |
def _ready_for_next_command(self) -> None: | |
"""Reset the content buffer for a new command.""" | |
# Clear the current content | |
self._clear_screen() | |
def _combine_outputs_between_matches( | |
self, | |
pane_content: str, | |
ps1_matches: list[re.Match], | |
get_content_before_last_match: bool = False, | |
) -> str: | |
"""Combine all outputs between PS1 matches. | |
Args: | |
pane_content: The full pane content containing PS1 prompts and command outputs | |
ps1_matches: List of regex matches for PS1 prompts | |
get_content_before_last_match: when there's only one PS1 match, whether to get | |
the content before the last PS1 prompt (True) or after the last PS1 prompt (False) | |
Returns: | |
Combined string of all outputs between matches | |
""" | |
if len(ps1_matches) == 1: | |
if get_content_before_last_match: | |
# The command output is the content before the last PS1 prompt | |
return pane_content[: ps1_matches[0].start()] | |
else: | |
# The command output is the content after the last PS1 prompt | |
return pane_content[ps1_matches[0].end() + 1 :] | |
elif len(ps1_matches) == 0: | |
return pane_content | |
combined_output = '' | |
for i in range(len(ps1_matches) - 1): | |
# Extract content between current and next PS1 prompt | |
output_segment = pane_content[ | |
ps1_matches[i].end() + 1 : ps1_matches[i + 1].start() | |
] | |
combined_output += output_segment + '\n' | |
# Add the content after the last PS1 prompt | |
combined_output += pane_content[ps1_matches[-1].end() + 1 :] | |
logger.debug(f'COMBINED OUTPUT: {combined_output}') | |
return combined_output | |
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation: | |
"""Execute a command in the bash session.""" | |
if not self._initialized: | |
raise RuntimeError('Bash session is not initialized') | |
# Strip the command of any leading/trailing whitespace | |
logger.debug(f'RECEIVED ACTION: {action}') | |
command = action.command.strip() | |
is_input: bool = action.is_input | |
# If the previous command is not completed, we need to check if the command is empty | |
if self.prev_status not in { | |
BashCommandStatus.CONTINUE, | |
BashCommandStatus.NO_CHANGE_TIMEOUT, | |
BashCommandStatus.HARD_TIMEOUT, | |
}: | |
if command == '': | |
return CmdOutputObservation( | |
content='ERROR: No previous running command to retrieve logs from.', | |
command='', | |
metadata=CmdOutputMetadata(), | |
) | |
if is_input: | |
return CmdOutputObservation( | |
content='ERROR: No previous running command to interact with.', | |
command='', | |
metadata=CmdOutputMetadata(), | |
) | |
# Check if the command is a single command or multiple commands | |
splited_commands = split_bash_commands(command) | |
if len(splited_commands) > 1: | |
return ErrorObservation( | |
content=( | |
f'ERROR: Cannot execute multiple commands at once.\n' | |
f'Please run each command separately OR chain them into a single command via && or ;\n' | |
f'Provided commands:\n{"\n".join(f"({i + 1}) {cmd}" for i, cmd in enumerate(splited_commands))}' | |
) | |
) | |
# Get initial state before sending command | |
initial_pane_output = self._get_pane_content() | |
initial_ps1_matches = CmdOutputMetadata.matches_ps1_metadata( | |
initial_pane_output | |
) | |
initial_ps1_count = len(initial_ps1_matches) | |
logger.debug(f'Initial PS1 count: {initial_ps1_count}') | |
start_time = time.time() | |
last_change_time = start_time | |
last_pane_output = ( | |
initial_pane_output # Use initial output as the starting point | |
) | |
# When prev command is still running, and we are trying to send a new command | |
if ( | |
self.prev_status | |
in { | |
BashCommandStatus.HARD_TIMEOUT, | |
BashCommandStatus.NO_CHANGE_TIMEOUT, | |
} | |
and not last_pane_output.rstrip().endswith( | |
CMD_OUTPUT_PS1_END.rstrip() | |
) # prev command is not completed | |
and not is_input | |
and command != '' # not input and not empty command | |
): | |
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output) | |
# Use initial_ps1_matches if _ps1_matches is empty, otherwise use _ps1_matches | |
# This handles the case where the prompt might be scrolled off screen but existed before | |
current_matches_for_output = ( | |
_ps1_matches if _ps1_matches else initial_ps1_matches | |
) | |
raw_command_output = self._combine_outputs_between_matches( | |
last_pane_output, current_matches_for_output | |
) | |
metadata = CmdOutputMetadata() # No metadata available | |
metadata.suffix = ( | |
f'\n[Your command "{command}" is NOT executed. ' | |
f'The previous command is still running - You CANNOT send new commands until the previous command is completed. ' | |
'By setting `is_input` to `true`, you can interact with the current process: ' | |
"You may wait longer to see additional output of the previous command by sending empty command '', " | |
'send other commands to interact with the current process, ' | |
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]' | |
) | |
logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}') | |
command_output = self._get_command_output( | |
command, | |
raw_command_output, | |
metadata, | |
continue_prefix='[Below is the output of the previous command.]\n', | |
) | |
return CmdOutputObservation( | |
command=command, | |
content=command_output, | |
metadata=metadata, | |
) | |
# Send actual command/inputs to the pane | |
if command != '': | |
is_special_key = self._is_special_key(command) | |
if is_input: | |
logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}') | |
self.pane.send_keys( | |
command, | |
enter=not is_special_key, | |
) | |
else: | |
# convert command to raw string | |
command = escape_bash_special_chars(command) | |
logger.debug(f'SENDING COMMAND: {command!r}') | |
self.pane.send_keys( | |
command, | |
enter=not is_special_key, | |
) | |
# Loop until the command completes or times out | |
while should_continue(): | |
_start_time = time.time() | |
logger.debug(f'GETTING PANE CONTENT at {_start_time}') | |
cur_pane_output = self._get_pane_content() | |
logger.debug( | |
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds' | |
) | |
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_output.split("\n")[:10]}') | |
logger.debug(f'END OF PANE CONTENT: {cur_pane_output.split("\n")[-10:]}') | |
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output) | |
current_ps1_count = len(ps1_matches) | |
if cur_pane_output != last_pane_output: | |
last_pane_output = cur_pane_output | |
last_change_time = time.time() | |
logger.debug(f'CONTENT UPDATED DETECTED at {last_change_time}') | |
# 1) Execution completed: | |
# Condition 1: A new prompt has appeared since the command started. | |
# Condition 2: The prompt count hasn't increased (potentially because the initial one scrolled off), | |
# BUT the *current* visible pane ends with a prompt, indicating completion. | |
if ( | |
current_ps1_count > initial_ps1_count | |
or cur_pane_output.rstrip().endswith(CMD_OUTPUT_PS1_END.rstrip()) | |
): | |
return self._handle_completed_command( | |
command, | |
pane_content=cur_pane_output, | |
ps1_matches=ps1_matches, | |
) | |
# Timeout checks should only trigger if a new prompt hasn't appeared yet. | |
# 2) Execution timed out since there's no change in output | |
# for a while (self.NO_CHANGE_TIMEOUT_SECONDS) | |
# We ignore this if the command is *blocking* | |
time_since_last_change = time.time() - last_change_time | |
logger.debug( | |
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}. Action blocking: {action.blocking}' | |
) | |
if ( | |
not action.blocking | |
and time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS | |
): | |
return self._handle_nochange_timeout_command( | |
command, | |
pane_content=cur_pane_output, | |
ps1_matches=ps1_matches, | |
) | |
# 3) Execution timed out due to hard timeout | |
elapsed_time = time.time() - start_time | |
logger.debug( | |
f'CHECKING HARD TIMEOUT ({action.timeout}s): elapsed {elapsed_time:.2f}' | |
) | |
if action.timeout and elapsed_time >= action.timeout: | |
logger.debug('Hard timeout triggered.') | |
return self._handle_hard_timeout_command( | |
command, | |
pane_content=cur_pane_output, | |
ps1_matches=ps1_matches, | |
timeout=action.timeout, | |
) | |
logger.debug(f'SLEEPING for {self.POLL_INTERVAL} seconds for next poll') | |
time.sleep(self.POLL_INTERVAL) | |
raise RuntimeError('Bash session was likely interrupted...') | |