|
import os
|
|
import re
|
|
import time
|
|
import traceback
|
|
import uuid
|
|
from enum import Enum
|
|
|
|
import bashlex
|
|
import libtmux
|
|
|
|
from openhands.core.logger import openhands_logger as logger
|
|
from openhands.events.action import CmdRunAction
|
|
from openhands.events.observation import ErrorObservation
|
|
from openhands.events.observation.commands import (
|
|
CMD_OUTPUT_PS1_END,
|
|
CmdOutputMetadata,
|
|
CmdOutputObservation,
|
|
)
|
|
from openhands.utils.shutdown_listener import should_continue
|
|
|
|
|
|
def split_bash_commands(commands):
|
|
if not commands.strip():
|
|
return ['']
|
|
try:
|
|
parsed = bashlex.parse(commands)
|
|
except (bashlex.errors.ParsingError, NotImplementedError):
|
|
logger.debug(
|
|
f'Failed to parse bash commands\n'
|
|
f'[input]: {commands}\n'
|
|
f'[warning]: {traceback.format_exc()}\n'
|
|
f'The original command will be returned as is.'
|
|
)
|
|
|
|
return [commands]
|
|
|
|
result: list[str] = []
|
|
last_end = 0
|
|
|
|
for node in parsed:
|
|
start, end = node.pos
|
|
|
|
|
|
if start > last_end:
|
|
between = commands[last_end:start]
|
|
logger.debug(f'BASH PARSING between: {between}')
|
|
if result:
|
|
result[-1] += between.rstrip()
|
|
elif between.strip():
|
|
|
|
result.append(between.rstrip())
|
|
|
|
|
|
command = commands[start:end].rstrip()
|
|
logger.debug(f'BASH PARSING command: {command}')
|
|
result.append(command)
|
|
|
|
last_end = end
|
|
|
|
|
|
remaining = commands[last_end:].rstrip()
|
|
logger.debug(f'BASH PARSING remaining: {remaining}')
|
|
if last_end < len(commands) and result:
|
|
result[-1] += remaining
|
|
logger.debug(f'BASH PARSING result[-1] += remaining: {result[-1]}')
|
|
elif last_end < len(commands):
|
|
if remaining:
|
|
result.append(remaining)
|
|
logger.debug(f'BASH PARSING result.append(remaining): {result[-1]}')
|
|
return result
|
|
|
|
|
|
def escape_bash_special_chars(command: str) -> str:
|
|
r"""
|
|
Escapes characters that have different interpretations in bash vs python.
|
|
Specifically handles escape sequences like \;, \|, \&, etc.
|
|
"""
|
|
if command.strip() == '':
|
|
return ''
|
|
|
|
try:
|
|
parts = []
|
|
last_pos = 0
|
|
|
|
def visit_node(node):
|
|
nonlocal last_pos
|
|
if (
|
|
node.kind == 'redirect'
|
|
and hasattr(node, 'heredoc')
|
|
and node.heredoc is not None
|
|
):
|
|
|
|
|
|
between = command[last_pos : node.pos[0]]
|
|
parts.append(between)
|
|
|
|
parts.append(command[node.pos[0] : node.heredoc.pos[0]])
|
|
|
|
parts.append(command[node.heredoc.pos[0] : node.heredoc.pos[1]])
|
|
last_pos = node.pos[1]
|
|
return
|
|
|
|
if node.kind == 'word':
|
|
|
|
between = command[last_pos : node.pos[0]]
|
|
word_text = command[node.pos[0] : node.pos[1]]
|
|
|
|
|
|
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
|
|
parts.append(between)
|
|
|
|
|
|
if (
|
|
(word_text.startswith('"') and word_text.endswith('"'))
|
|
or (word_text.startswith("'") and word_text.endswith("'"))
|
|
or (word_text.startswith('$(') and word_text.endswith(')'))
|
|
or (word_text.startswith('`') and word_text.endswith('`'))
|
|
):
|
|
|
|
parts.append(word_text)
|
|
else:
|
|
|
|
word_text = re.sub(r'\\([;&|><])', r'\\\\\1', word_text)
|
|
parts.append(word_text)
|
|
|
|
last_pos = node.pos[1]
|
|
return
|
|
|
|
|
|
if hasattr(node, 'parts'):
|
|
for part in node.parts:
|
|
visit_node(part)
|
|
|
|
|
|
nodes = list(bashlex.parse(command))
|
|
for node in nodes:
|
|
between = command[last_pos : node.pos[0]]
|
|
between = re.sub(r'\\([;&|><])', r'\\\\\1', between)
|
|
parts.append(between)
|
|
last_pos = node.pos[0]
|
|
visit_node(node)
|
|
|
|
|
|
remaining = command[last_pos:]
|
|
parts.append(remaining)
|
|
return ''.join(parts)
|
|
except (bashlex.errors.ParsingError, NotImplementedError):
|
|
logger.debug(
|
|
f'Failed to parse bash commands for special characters escape\n'
|
|
f'[input]: {command}\n'
|
|
f'[warning]: {traceback.format_exc()}\n'
|
|
f'The original command will be returned as is.'
|
|
)
|
|
return command
|
|
|
|
|
|
class BashCommandStatus(Enum):
|
|
CONTINUE = 'continue'
|
|
COMPLETED = 'completed'
|
|
NO_CHANGE_TIMEOUT = 'no_change_timeout'
|
|
HARD_TIMEOUT = 'hard_timeout'
|
|
|
|
|
|
def _remove_command_prefix(command_output: str, command: str) -> str:
|
|
return command_output.lstrip().removeprefix(command.lstrip()).lstrip()
|
|
|
|
|
|
class BashSession:
|
|
POLL_INTERVAL = 0.5
|
|
HISTORY_LIMIT = 10_000
|
|
PS1 = CmdOutputMetadata.to_ps1_prompt()
|
|
|
|
def __init__(
|
|
self,
|
|
work_dir: str,
|
|
username: str | None = None,
|
|
no_change_timeout_seconds: int = 30,
|
|
):
|
|
self.NO_CHANGE_TIMEOUT_SECONDS = no_change_timeout_seconds
|
|
self.work_dir = work_dir
|
|
self.username = username
|
|
self._initialized = False
|
|
|
|
def initialize(self):
|
|
self.server = libtmux.Server()
|
|
window_command = '/bin/bash'
|
|
if self.username:
|
|
|
|
window_command = f'su {self.username} -'
|
|
|
|
session_name = f'openhands-{self.username}-{uuid.uuid4()}'
|
|
self.session = self.server.new_session(
|
|
session_name=session_name,
|
|
window_name='bash',
|
|
window_command=window_command,
|
|
start_directory=self.work_dir,
|
|
kill_session=True,
|
|
x=1000,
|
|
y=1000,
|
|
)
|
|
|
|
|
|
|
|
self.session.set_option('history-limit', str(self.HISTORY_LIMIT), _global=True)
|
|
self.session.history_limit = self.HISTORY_LIMIT
|
|
|
|
_initial_window = self.session.attached_window
|
|
self.window = self.session.new_window(
|
|
window_shell=window_command,
|
|
start_directory=self.work_dir,
|
|
)
|
|
self.pane = self.window.attached_pane
|
|
logger.debug(f'pane: {self.pane}; history_limit: {self.session.history_limit}')
|
|
_initial_window.kill_window()
|
|
|
|
|
|
self.pane.send_keys(
|
|
f'export PROMPT_COMMAND=\'export PS1="{self.PS1}"\'; export PS2=""'
|
|
)
|
|
time.sleep(0.1)
|
|
self._clear_screen()
|
|
|
|
|
|
self.prev_status: BashCommandStatus | None = None
|
|
self.prev_output: str = ''
|
|
self._closed: bool = False
|
|
logger.debug(f'Bash session initialized with work dir: {self.work_dir}')
|
|
|
|
|
|
self._cwd = os.path.abspath(self.work_dir)
|
|
self._initialized = True
|
|
|
|
def __del__(self):
|
|
"""Ensure the session is closed when the object is destroyed."""
|
|
self.close()
|
|
|
|
def _get_pane_content(self) -> str:
|
|
"""Capture the current pane content and update the buffer."""
|
|
content = '\n'.join(
|
|
map(
|
|
|
|
lambda line: line.rstrip(),
|
|
self.pane.cmd('capture-pane', '-J', '-pS', '-').stdout,
|
|
)
|
|
)
|
|
return content
|
|
|
|
def close(self):
|
|
"""Clean up the session."""
|
|
if self._closed:
|
|
return
|
|
self.session.kill_session()
|
|
self._closed = True
|
|
|
|
@property
|
|
def cwd(self):
|
|
return self._cwd
|
|
|
|
def _is_special_key(self, command: str) -> bool:
|
|
"""Check if the command is a special key."""
|
|
|
|
_command = command.strip()
|
|
return _command.startswith('C-') and len(_command) == 3
|
|
|
|
def _clear_screen(self):
|
|
"""Clear the tmux pane screen and history."""
|
|
self.pane.send_keys('C-l', enter=False)
|
|
time.sleep(0.1)
|
|
self.pane.cmd('clear-history')
|
|
|
|
def _get_command_output(
|
|
self,
|
|
command: str,
|
|
raw_command_output: str,
|
|
metadata: CmdOutputMetadata,
|
|
continue_prefix: str = '',
|
|
) -> str:
|
|
"""Get the command output with the previous command output removed.
|
|
|
|
Args:
|
|
command: The command that was executed.
|
|
raw_command_output: The raw output from the command.
|
|
metadata: The metadata object to store prefix/suffix in.
|
|
continue_prefix: The prefix to add to the command output if it's a continuation of the previous command.
|
|
"""
|
|
|
|
if self.prev_output:
|
|
command_output = raw_command_output.removeprefix(self.prev_output)
|
|
metadata.prefix = continue_prefix
|
|
else:
|
|
command_output = raw_command_output
|
|
self.prev_output = raw_command_output
|
|
command_output = _remove_command_prefix(command_output, command)
|
|
return command_output.rstrip()
|
|
|
|
def _handle_completed_command(
|
|
self, command: str, pane_content: str, ps1_matches: list[re.Match]
|
|
) -> CmdOutputObservation:
|
|
is_special_key = self._is_special_key(command)
|
|
assert len(ps1_matches) >= 1, (
|
|
f'Expected at least one PS1 metadata block, but got {len(ps1_matches)}.\n'
|
|
f'---FULL OUTPUT---\n{pane_content!r}\n---END OF OUTPUT---'
|
|
)
|
|
metadata = CmdOutputMetadata.from_ps1_match(ps1_matches[-1])
|
|
|
|
|
|
|
|
get_content_before_last_match = bool(len(ps1_matches) == 1)
|
|
|
|
|
|
if metadata.working_dir != self._cwd and metadata.working_dir:
|
|
self._cwd = metadata.working_dir
|
|
|
|
logger.debug(f'COMMAND OUTPUT: {pane_content}')
|
|
|
|
raw_command_output = self._combine_outputs_between_matches(
|
|
pane_content,
|
|
ps1_matches,
|
|
get_content_before_last_match=get_content_before_last_match,
|
|
)
|
|
|
|
if get_content_before_last_match:
|
|
|
|
num_lines = len(raw_command_output.splitlines())
|
|
metadata.prefix = f'[Previous command outputs are truncated. Showing the last {num_lines} lines of the output below.]\n'
|
|
|
|
metadata.suffix = (
|
|
f'\n[The command completed with exit code {metadata.exit_code}.]'
|
|
if not is_special_key
|
|
else f'\n[The command completed with exit code {metadata.exit_code}. CTRL+{command[-1].upper()} was sent.]'
|
|
)
|
|
command_output = self._get_command_output(
|
|
command,
|
|
raw_command_output,
|
|
metadata,
|
|
)
|
|
self.prev_status = BashCommandStatus.COMPLETED
|
|
self.prev_output = ''
|
|
self._ready_for_next_command()
|
|
return CmdOutputObservation(
|
|
content=command_output,
|
|
command=command,
|
|
metadata=metadata,
|
|
)
|
|
|
|
def _handle_nochange_timeout_command(
|
|
self,
|
|
command: str,
|
|
pane_content: str,
|
|
ps1_matches: list[re.Match],
|
|
) -> CmdOutputObservation:
|
|
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
|
|
if len(ps1_matches) != 1:
|
|
logger.warning(
|
|
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
|
|
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
|
|
)
|
|
raw_command_output = self._combine_outputs_between_matches(
|
|
pane_content, ps1_matches
|
|
)
|
|
metadata = CmdOutputMetadata()
|
|
metadata.suffix = (
|
|
f'\n[The command has no new output after {self.NO_CHANGE_TIMEOUT_SECONDS} seconds. '
|
|
"You may wait longer to see additional output by sending empty command '', "
|
|
'send other commands to interact with the current process, '
|
|
'or send keys to interrupt/kill the command.]'
|
|
)
|
|
command_output = self._get_command_output(
|
|
command,
|
|
raw_command_output,
|
|
metadata,
|
|
continue_prefix='[Below is the output of the previous command.]\n',
|
|
)
|
|
return CmdOutputObservation(
|
|
content=command_output,
|
|
command=command,
|
|
metadata=metadata,
|
|
)
|
|
|
|
def _handle_hard_timeout_command(
|
|
self,
|
|
command: str,
|
|
pane_content: str,
|
|
ps1_matches: list[re.Match],
|
|
timeout: float,
|
|
) -> CmdOutputObservation:
|
|
self.prev_status = BashCommandStatus.HARD_TIMEOUT
|
|
if len(ps1_matches) != 1:
|
|
logger.warning(
|
|
'Expected exactly one PS1 metadata block BEFORE the execution of a command, '
|
|
f'but got {len(ps1_matches)} PS1 metadata blocks:\n---\n{pane_content!r}\n---'
|
|
)
|
|
raw_command_output = self._combine_outputs_between_matches(
|
|
pane_content, ps1_matches
|
|
)
|
|
metadata = CmdOutputMetadata()
|
|
metadata.suffix = (
|
|
f'\n[The command timed out after {timeout} seconds. '
|
|
"You may wait longer to see additional output by sending empty command '', "
|
|
'send other commands to interact with the current process, '
|
|
'or send keys to interrupt/kill the command.]'
|
|
)
|
|
command_output = self._get_command_output(
|
|
command,
|
|
raw_command_output,
|
|
metadata,
|
|
continue_prefix='[Below is the output of the previous command.]\n',
|
|
)
|
|
|
|
return CmdOutputObservation(
|
|
command=command,
|
|
content=command_output,
|
|
metadata=metadata,
|
|
)
|
|
|
|
def _ready_for_next_command(self):
|
|
"""Reset the content buffer for a new command."""
|
|
|
|
self._clear_screen()
|
|
|
|
def _combine_outputs_between_matches(
|
|
self,
|
|
pane_content: str,
|
|
ps1_matches: list[re.Match],
|
|
get_content_before_last_match: bool = False,
|
|
) -> str:
|
|
"""Combine all outputs between PS1 matches.
|
|
|
|
Args:
|
|
pane_content: The full pane content containing PS1 prompts and command outputs
|
|
ps1_matches: List of regex matches for PS1 prompts
|
|
get_content_before_last_match: when there's only one PS1 match, whether to get
|
|
the content before the last PS1 prompt (True) or after the last PS1 prompt (False)
|
|
Returns:
|
|
Combined string of all outputs between matches
|
|
"""
|
|
if len(ps1_matches) == 1:
|
|
if get_content_before_last_match:
|
|
|
|
return pane_content[: ps1_matches[0].start()]
|
|
else:
|
|
|
|
return pane_content[ps1_matches[0].end() + 1 :]
|
|
elif len(ps1_matches) == 0:
|
|
return pane_content
|
|
combined_output = ''
|
|
for i in range(len(ps1_matches) - 1):
|
|
|
|
output_segment = pane_content[
|
|
ps1_matches[i].end() + 1 : ps1_matches[i + 1].start()
|
|
]
|
|
combined_output += output_segment + '\n'
|
|
logger.debug(f'COMBINED OUTPUT: {combined_output}')
|
|
return combined_output
|
|
|
|
def execute(self, action: CmdRunAction) -> CmdOutputObservation | ErrorObservation:
|
|
"""Execute a command in the bash session."""
|
|
if not self._initialized:
|
|
raise RuntimeError('Bash session is not initialized')
|
|
|
|
|
|
logger.debug(f'RECEIVED ACTION: {action}')
|
|
command = action.command.strip()
|
|
is_input: bool = action.is_input
|
|
|
|
|
|
if self.prev_status not in {
|
|
BashCommandStatus.CONTINUE,
|
|
BashCommandStatus.NO_CHANGE_TIMEOUT,
|
|
BashCommandStatus.HARD_TIMEOUT,
|
|
}:
|
|
if command == '':
|
|
return CmdOutputObservation(
|
|
content='ERROR: No previous running command to retrieve logs from.',
|
|
command='',
|
|
metadata=CmdOutputMetadata(),
|
|
)
|
|
if is_input:
|
|
return CmdOutputObservation(
|
|
content='ERROR: No previous running command to interact with.',
|
|
command='',
|
|
metadata=CmdOutputMetadata(),
|
|
)
|
|
|
|
|
|
splited_commands = split_bash_commands(command)
|
|
if len(splited_commands) > 1:
|
|
return ErrorObservation(
|
|
content=(
|
|
f'ERROR: Cannot execute multiple commands at once.\n'
|
|
f'Please run each command separately OR chain them into a single command via && or ;\n'
|
|
f'Provided commands:\n{"\n".join(f"({i+1}) {cmd}" for i, cmd in enumerate(splited_commands))}'
|
|
)
|
|
)
|
|
|
|
start_time = time.time()
|
|
last_change_time = start_time
|
|
last_pane_output = self._get_pane_content()
|
|
|
|
|
|
if (
|
|
self.prev_status
|
|
in {
|
|
BashCommandStatus.HARD_TIMEOUT,
|
|
BashCommandStatus.NO_CHANGE_TIMEOUT,
|
|
}
|
|
and not last_pane_output.endswith(
|
|
CMD_OUTPUT_PS1_END
|
|
)
|
|
and not is_input
|
|
and command != ''
|
|
):
|
|
_ps1_matches = CmdOutputMetadata.matches_ps1_metadata(last_pane_output)
|
|
raw_command_output = self._combine_outputs_between_matches(
|
|
last_pane_output, _ps1_matches
|
|
)
|
|
metadata = CmdOutputMetadata()
|
|
metadata.suffix = (
|
|
f'\n[Your command "{command}" is NOT executed. '
|
|
f'The previous command is still running - You CANNOT send new commands until the previous command is completed. '
|
|
'By setting `is_input` to `true`, you can interact with the current process: '
|
|
"You may wait longer to see additional output of the previous command by sending empty command '', "
|
|
'send other commands to interact with the current process, '
|
|
'or send keys ("C-c", "C-z", "C-d") to interrupt/kill the previous command before sending your new command.]'
|
|
)
|
|
logger.debug(f'PREVIOUS COMMAND OUTPUT: {raw_command_output}')
|
|
command_output = self._get_command_output(
|
|
command,
|
|
raw_command_output,
|
|
metadata,
|
|
continue_prefix='[Below is the output of the previous command.]\n',
|
|
)
|
|
return CmdOutputObservation(
|
|
command=command,
|
|
content=command_output,
|
|
metadata=metadata,
|
|
)
|
|
|
|
|
|
if command != '':
|
|
is_special_key = self._is_special_key(command)
|
|
if is_input:
|
|
logger.debug(f'SENDING INPUT TO RUNNING PROCESS: {command!r}')
|
|
self.pane.send_keys(
|
|
command,
|
|
enter=not is_special_key,
|
|
)
|
|
else:
|
|
|
|
command = escape_bash_special_chars(command)
|
|
logger.debug(f'SENDING COMMAND: {command!r}')
|
|
self.pane.send_keys(
|
|
command,
|
|
enter=not is_special_key,
|
|
)
|
|
|
|
|
|
while should_continue():
|
|
_start_time = time.time()
|
|
logger.debug(f'GETTING PANE CONTENT at {_start_time}')
|
|
cur_pane_output = self._get_pane_content()
|
|
logger.debug(
|
|
f'PANE CONTENT GOT after {time.time() - _start_time:.2f} seconds'
|
|
)
|
|
logger.debug(f'BEGIN OF PANE CONTENT: {cur_pane_output.split("\n")[:10]}')
|
|
logger.debug(f'END OF PANE CONTENT: {cur_pane_output.split("\n")[-10:]}')
|
|
ps1_matches = CmdOutputMetadata.matches_ps1_metadata(cur_pane_output)
|
|
if cur_pane_output != last_pane_output:
|
|
last_pane_output = cur_pane_output
|
|
last_change_time = time.time()
|
|
logger.debug(f'CONTENT UPDATED DETECTED at {last_change_time}')
|
|
|
|
|
|
|
|
if cur_pane_output.rstrip().endswith(CMD_OUTPUT_PS1_END.rstrip()):
|
|
return self._handle_completed_command(
|
|
command,
|
|
pane_content=cur_pane_output,
|
|
ps1_matches=ps1_matches,
|
|
)
|
|
|
|
|
|
|
|
|
|
time_since_last_change = time.time() - last_change_time
|
|
logger.debug(
|
|
f'CHECKING NO CHANGE TIMEOUT ({self.NO_CHANGE_TIMEOUT_SECONDS}s): elapsed {time_since_last_change}. Action blocking: {action.blocking}'
|
|
)
|
|
if (
|
|
not action.blocking
|
|
and time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS
|
|
):
|
|
return self._handle_nochange_timeout_command(
|
|
command,
|
|
pane_content=cur_pane_output,
|
|
ps1_matches=ps1_matches,
|
|
)
|
|
|
|
|
|
logger.debug(
|
|
f'CHECKING HARD TIMEOUT ({action.timeout}s): elapsed {time.time() - start_time}'
|
|
)
|
|
if action.timeout and time.time() - start_time >= action.timeout:
|
|
return self._handle_hard_timeout_command(
|
|
command,
|
|
pane_content=cur_pane_output,
|
|
ps1_matches=ps1_matches,
|
|
timeout=action.timeout,
|
|
)
|
|
|
|
logger.debug(f'SLEEPING for {self.POLL_INTERVAL} seconds for next poll')
|
|
time.sleep(self.POLL_INTERVAL)
|
|
raise RuntimeError('Bash session was likely interrupted...')
|
|
|