ar08's picture
Upload 1040 files
246d201 verified
import os
import re
import time
import traceback
import uuid
from enum import Enum
import bashlex
import libtmux
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import CmdRunAction
from openhands.events.observation import ErrorObservation
from openhands.events.observation.commands import (
CMD_OUTPUT_PS1_END,
CmdOutputMetadata,
CmdOutputObservation,
)
from openhands.utils.shutdown_listener import should_continue
def split_bash_commands(commands):
if not commands.strip():
return ['']
try:
parsed = bashlex.parse(commands)
except (bashlex.errors.ParsingError, NotImplementedError):
logger.debug(
f'Failed to parse bash commands\n'
f'[input]: {commands}\n'
f'[warning]: {traceback.format_exc()}\n'
f'The original command will be returned as is.'
)
# If parsing fails, return the original commands
return [commands]
result: list[str] = []
last_end = 0
for node in parsed:
start, end = node.pos
# Include any text between the last command and this one
if start > last_end:
between = commands[last_end:start]
logger.debug(f'BASH PARSING between: {between}')
if result:
result[-1] += between.rstrip()
elif between.strip():
# THIS SHOULD NOT HAPPEN
result.append(between.rstrip())
# Extract the command, preserving original formatting
command = commands[start:end].rstrip()
logger.debug(f'BASH PARSING command: {command}')
result.append(command)
last_end = end
# Add any remaining text after the last command to the last command
remaining = commands[last_end:].rstrip()
logger.debug(f'BASH PARSING remaining: {remaining}')
if last_end < len(commands) and result:
result[-1] += remaining
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}')
elif last_end < len(commands):
if remaining:
result.append(remaining)
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
return result
def escape_bash_special_chars(command: str) -> str:
r"""
Escapes characters that have different interpretations in bash vs python.
Specifically handles escape sequences like \;, \|, \&, etc.
"""
if command.strip() == '':
return ''
try:
parts = []
last_pos = 0
def visit_node(node):
nonlocal last_pos
if (
node.kind == 'redirect'
and hasattr(node, 'heredoc')
and node.heredoc is not None
):
# We're entering a heredoc - preserve everything as-is until we see EOF
# Store the heredoc end marker (usually 'EOF' but could be different)
between = command[last_pos : node.pos[0]]
parts.append(between)
# Add the heredoc start marker
parts.append(command[node.pos[0] : node.heredoc.pos[0]])
# Add the heredoc content as-is
parts.append(command[node.heredoc.pos[0] : node.heredoc.pos[1]])
last_pos = node.pos[1]
return
if node.kind == 'word':
# Get the raw text between the last position and current word
between = command[last_pos : node.pos[0]]
word_text = command[node.pos[0] : node.pos[1]]
# Add the between text, escaping special characters
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
parts.append(between)
# Check if word_text is a quoted string or command substitution
if (
(word_text.startswith('"') and word_text.endswith('"'))
or (word_text.startswith("'") and word_text.endswith("'"))
or (word_text.startswith('$(') and word_text.endswith(')'))
or (word_text.startswith('`') and word_text.endswith('`'))
):
# Preserve quoted strings, command substitutions, and heredoc content as-is
parts.append(word_text)
else:
# Escape special chars in unquoted text
word_text = re.sub(r'\\([;&|><])', r'\\\\\1', word_text)
parts.append(word_text)
last_pos = node.pos[1]
return
# Visit child nodes
if hasattr(node, 'parts'):
for part in node.parts:
visit_node(part)
# Process all nodes in the AST
nodes = list(bashlex.parse(command))
for node in nodes:
between = command[last_pos : node.pos[0]]
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
parts.append(between)
last_pos = node.pos[0]
visit_node(node)
# Handle any remaining text after the last word
remaining = command[last_pos:]
parts.append(remaining)
return ''.join(parts)
except (bashlex.errors.ParsingError, NotImplementedError):
logger.debug(
f'Failed to parse bash commands for special characters escape\n'
f'[input]: {command}\n'
f'[warning]: {traceback.format_exc()}\n'
f'The original command will be returned as is.'
)
return command
class BashCommandStatus(Enum):
CONTINUE = 'continue'
COMPLETED = 'completed'
NO_CHANGE_TIMEOUT = 'no_change_timeout'
HARD_TIMEOUT = 'hard_timeout'
def _remove_command_prefix(command_output: str, command: str) -> str:
return command_output.lstrip().removeprefix(command.lstrip()).lstrip()
class BashSession:
POLL_INTERVAL = 0.5
HISTORY_LIMIT = 10_000
PS1 = CmdOutputMetadata.to_ps1_prompt()
def __init__(
self,
work_dir: str,
username: str | None = None,
no_change_timeout_seconds: int = 30,
):
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds
self.work_dir = work_dir
self.username = username
self._initialized = False
def initialize(self):
self.server = libtmux.Server()
window_command = '/bin/bash'
if self.username:
# This starts a non-login (new) shell for the given user
window_command = f'su {self.username} -'
session_name = f'openhands-{self.username}-{uuid.uuid4()}'
self.session = self.server.new_session(
session_name=session_name,
window_name='bash',
window_command=window_command,
start_directory=self.work_dir,
kill_session=True,
x=1000,
y=1000,
)
# Set history limit to a large number to avoid losing history
# https://unix.stackexchange.com/questions/43414/unlimited-history-in-tmux
self.session.set_option('history-limit', str(self.HISTORY_LIMIT), _global=True)
self.session.history_limit = self.HISTORY_LIMIT
# We need to create a new pane because the initial pane's history limit is (default) 2000
_initial_window = self.session.attached_window
self.window = self.session.new_window(
window_shell=window_command,
start_directory=self.work_dir,
)
self.pane = self.window.attached_pane
logger.debug(f'pane: {self.pane}; history_limit: {self.session.history_limit}')
_initial_window.kill_window()
# Configure bash to use simple PS1 and disable PS2
self.pane.send_keys(
f'export PROMPT_COMMAND=\'export PS1="{self.PS1}"\'; export PS2=""'
)
time.sleep(0.1) # Wait for command to take effect
self._clear_screen()
# Store the last command for interactive input handling
self.prev_status: BashCommandStatus | None = None
self.prev_output: str = ''
self._closed: bool = False
logger.debug(f'Bash session initialized with work dir: {self.work_dir}')
# Maintain the current working directory
self._cwd = os.path.abspath(self.work_dir)
self._initialized = True
def __del__(self):
"""Ensure the session is closed when the object is destroyed."""
self.close()
def _get_pane_content(self) -> str:
"""Capture the current pane content and update the buffer."""
content = '\n'.join(
map(
# avoid double newlines
lambda line: line.rstrip(),
self.pane.cmd('capture-pane', '-J', '-pS', '-').stdout,
)
)
return content
def close(self):
"""Clean up the session."""
if self._closed:
return
self.session.kill_session()
self._closed = True
@property
def cwd(self):
return self._cwd
def _is_special_key(self, command: str) -> bool:
"""Check if the command is a special key."""
# Special keys are of the form C-<key>
_command = command.strip()
return _command.startswith('C-') and len(_command) == 3
def _clear_screen(self):
"""Clear the tmux pane screen and history."""
self.pane.send_keys('C-l', enter=False)
time.sleep(0.1)
self.pane.cmd('clear-history')
def _get_command_output(
self,
command: str,
raw_command_output: str,
metadata: CmdOutputMetadata,
continue_prefix: str = '',
) -> str:
"""Get the command output with the previous command output removed.
Args:
command: The command that was executed.
raw_command_output: The raw output from the command.
metadata: The metadata object to store prefix/suffix in.
continue_prefix: The prefix to add to the command output if it's a continuation of the previous command.
"""
# remove the previous command output from the new output if any
if self.prev_output:
command_output = raw_command_output.removeprefix(self.prev_output)
metadata.prefix = continue_prefix
else:
command_output = raw_command_output
self.prev_output = raw_command_output # update current command output anyway
command_output = _remove_command_prefix(command_output, command)
return command_output.rstrip()
def _handle_completed_command(
self, command: str, pane_content: str, ps1_matches: list[re.Match]
) -> CmdOutputObservation:
is_special_key = self._is_special_key(command)
assert len(ps1_matches) >= 1, (
f'Expected at least one PS1 metadata block, but got {len(ps1_matches)}.\n'
f'---FULL OUTPUT---\n{pane_content!r}\n---END OF OUTPUT---'
)
metadata = CmdOutputMetadata.from_ps1_match(ps1_matches[-1])
# Special case where the previous command output is truncated due to history limit
# We should get the content BEFORE the last PS1 prompt
get_content_before_last_match = bool(len(ps1_matches) == 1)
# Update the current working directory if it has changed
if metadata.working_dir != self._cwd and metadata.working_dir:
self._cwd = metadata.working_dir
logger.debug(f'COMMAND OUTPUT: {pane_content}')
# Extract the command output between the two PS1 prompts
raw_command_output = self._combine_outputs_between_matches(
pane_content,
ps1_matches,
get_content_before_last_match=get_content_before_last_match,
)
if get_content_before_last_match:
# Count the number of lines in the truncated output
num_lines = len(raw_command_output.splitlines())
metadata.prefix = f'[Previous command outputs are truncated. Showing the last {num_lines} lines of the output below.]\n'
metadata.suffix = (
f'\n[The command completed with exit code {metadata.exit_code}.]'
if not is_special_key
else f'\n[The command completed with exit code {metadata.exit_code}. CTRL+{command[-1].upper()} was sent.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
)
self.prev_status = BashCommandStatus.COMPLETED
self.prev_output = '' # Reset previous command output
self._ready_for_next_command()
return CmdOutputObservation(
content=command_output,
command=command,
metadata=metadata,
)
def _handle_nochange_timeout_command(
self,
command: str,
pane_content: str,
ps1_matches: list[re.Match],
) -> CmdOutputObservation:
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
if len(ps1_matches) != 1:
logger.warning(
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
)
raw_command_output = self._combine_outputs_between_matches(
pane_content, ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
content=command_output,
command=command,
metadata=metadata,
)
def _handle_hard_timeout_command(
self,
command: str,
pane_content: str,
ps1_matches: list[re.Match],
timeout: float,
) -> CmdOutputObservation:
self.prev_status = BashCommandStatus.HARD_TIMEOUT
if len(ps1_matches) != 1:
logger.warning(
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
)
raw_command_output = self._combine_outputs_between_matches(
pane_content, ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[The command timed out after {timeout} seconds. '
"You may wait longer to see additional output by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys to interrupt/kill the command.]'
)
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
)
def _ready_for_next_command(self):
"""Reset the content buffer for a new command."""
# Clear the current content
self._clear_screen()
def _combine_outputs_between_matches(
self,
pane_content: str,
ps1_matches: list[re.Match],
get_content_before_last_match: bool = False,
) -> str:
"""Combine all outputs between PS1 matches.
Args:
pane_content: The full pane content containing PS1 prompts and command outputs
ps1_matches: List of regex matches for PS1 prompts
get_content_before_last_match: when there's only one PS1 match, whether to get
the content before the last PS1 prompt (True) or after the last PS1 prompt (False)
Returns:
Combined string of all outputs between matches
"""
if len(ps1_matches) == 1:
if get_content_before_last_match:
# The command output is the content before the last PS1 prompt
return pane_content[: ps1_matches[0].start()]
else:
# The command output is the content after the last PS1 prompt
return pane_content[ps1_matches[0].end() + 1 :]
elif len(ps1_matches) == 0:
return pane_content
combined_output = ''
for i in range(len(ps1_matches) - 1):
# Extract content between current and next PS1 prompt
output_segment = pane_content[
ps1_matches[i].end() + 1 : ps1_matches[i + 1].start()
]
combined_output += output_segment + '\n'
logger.debug(f'COMBINED OUTPUT: {combined_output}')
return combined_output
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
"""Execute a command in the bash session."""
if not self._initialized:
raise RuntimeError('Bash session is not initialized')
# Strip the command of any leading/trailing whitespace
logger.debug(f'RECEIVED ACTION: {action}')
command = action.command.strip()
is_input: bool = action.is_input
# If the previous command is not completed, we need to check if the command is empty
if self.prev_status not in {
BashCommandStatus.CONTINUE,
BashCommandStatus.NO_CHANGE_TIMEOUT,
BashCommandStatus.HARD_TIMEOUT,
}:
if command == '':
return CmdOutputObservation(
content='ERROR: No previous running command to retrieve logs from.',
command='',
metadata=CmdOutputMetadata(),
)
if is_input:
return CmdOutputObservation(
content='ERROR: No previous running command to interact with.',
command='',
metadata=CmdOutputMetadata(),
)
# Check if the command is a single command or multiple commands
splited_commands = split_bash_commands(command)
if len(splited_commands) > 1:
return ErrorObservation(
content=(
f'ERROR: Cannot execute multiple commands at once.\n'
f'Please run each command separately OR chain them into a single command via && or ;\n'
f'Provided commands:\n{"\n".join(f"({i+1}) {cmd}" for i, cmd in enumerate(splited_commands))}'
)
)
start_time = time.time()
last_change_time = start_time
last_pane_output = self._get_pane_content()
# When prev command is still running, and we are trying to send a new command
if (
self.prev_status
in {
BashCommandStatus.HARD_TIMEOUT,
BashCommandStatus.NO_CHANGE_TIMEOUT,
}
and not last_pane_output.endswith(
CMD_OUTPUT_PS1_END
) # prev command is not completed
and not is_input
and command != '' # not input and not empty command
):
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
raw_command_output = self._combine_outputs_between_matches(
last_pane_output, _ps1_matches
)
metadata = CmdOutputMetadata() # No metadata available
metadata.suffix = (
f'\n[Your command "{command}" is NOT executed. '
f'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
'By setting `is_input` to `true`, you can interact with the current process: '
"You may wait longer to see additional output of the previous command by sending empty command '', "
'send other commands to interact with the current process, '
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
)
logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
command_output = self._get_command_output(
command,
raw_command_output,
metadata,
continue_prefix='[Below is the output of the previous command.]\n',
)
return CmdOutputObservation(
command=command,
content=command_output,
metadata=metadata,
)
# Send actual command/inputs to the pane
if command != '':
is_special_key = self._is_special_key(command)
if is_input:
logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
self.pane.send_keys(
command,
enter=not is_special_key,
)
else:
# convert command to raw string
command = escape_bash_special_chars(command)
logger.debug(f'SENDING COMMAND: {command!r}')
self.pane.send_keys(
command,
enter=not is_special_key,
)
# Loop until the command completes or times out
while should_continue():
_start_time = time.time()
logger.debug(f'GETTING PANE CONTENT at {_start_time}')
cur_pane_output = self._get_pane_content()
logger.debug(
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds'
)
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_output.split("\n")[:10]}')
logger.debug(f'END OF PANE CONTENT: {cur_pane_output.split("\n")[-10:]}')
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output)
if cur_pane_output != last_pane_output:
last_pane_output = cur_pane_output
last_change_time = time.time()
logger.debug(f'CONTENT UPDATED DETECTED at {last_change_time}')
# 1) Execution completed
# if the last command output contains the end marker
if cur_pane_output.rstrip().endswith(CMD_OUTPUT_PS1_END.rstrip()):
return self._handle_completed_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
)
# 2) Execution timed out since there's no change in output
# for a while (self.NO_CHANGE_TIMEOUT_SECONDS)
# We ignore this if the command is *blocking
time_since_last_change = time.time() - last_change_time
logger.debug(
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}. Action blocking: {action.blocking}'
)
if (
not action.blocking
and time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS
):
return self._handle_nochange_timeout_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
)
# 3) Execution timed out due to hard timeout
logger.debug(
f'CHECKING HARD TIMEOUT ({action.timeout}s): elapsed {time.time() - start_time}'
)
if action.timeout and time.time() - start_time >= action.timeout:
return self._handle_hard_timeout_command(
command,
pane_content=cur_pane_output,
ps1_matches=ps1_matches,
timeout=action.timeout,
)
logger.debug(f'SLEEPING for {self.POLL_INTERVAL} seconds for next poll')
time.sleep(self.POLL_INTERVAL)
raise RuntimeError('Bash session was likely interrupted...')