Backup-bdg's picture
Upload 964 files
51ff9e5 verified
raw
history blame
10.8 kB
"""This file contains the function calling implementation for different actions.
This is similar to the functionality of `CodeActResponseParser`.
"""
import json
from litellm import (
ModelResponse,
)
from openhands.agenthub.codeact_agent.tools import (
BrowserTool,
FinishTool,
IPythonTool,
LLMBasedFileEditTool,
ThinkTool,
create_cmd_run_tool,
create_str_replace_editor_tool,
)
from openhands.core.exceptions import (
FunctionCallNotExistsError,
FunctionCallValidationError,
)
from openhands.core.logger import openhands_logger as logger
from openhands.events.action import (
Action,
AgentDelegateAction,
AgentFinishAction,
AgentThinkAction,
BrowseInteractiveAction,
CmdRunAction,
FileEditAction,
FileReadAction,
IPythonRunCellAction,
MessageAction,
)
from openhands.events.action.mcp import MCPAction
from openhands.events.event import FileEditSource, FileReadSource
from openhands.events.tool import ToolCallMetadata
def combine_thought(action: Action, thought: str) -> Action:
if not hasattr(action, 'thought'):
return action
if thought and action.thought:
action.thought = f'{thought}\n{action.thought}'
elif thought:
action.thought = thought
return action
def response_to_actions(
response: ModelResponse, mcp_tool_names: list[str] | None = None
) -> list[Action]:
actions: list[Action] = []
assert len(response.choices) == 1, 'Only one choice is supported for now'
choice = response.choices[0]
assistant_msg = choice.message
if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls:
# Check if there's assistant_msg.content. If so, add it to the thought
thought = ''
if isinstance(assistant_msg.content, str):
thought = assistant_msg.content
elif isinstance(assistant_msg.content, list):
for msg in assistant_msg.content:
if msg['type'] == 'text':
thought += msg['text']
# Process each tool call to OpenHands action
for i, tool_call in enumerate(assistant_msg.tool_calls):
action: Action
logger.debug(f'Tool call in function_calling.py: {tool_call}')
try:
arguments = json.loads(tool_call.function.arguments)
except json.decoder.JSONDecodeError as e:
raise FunctionCallValidationError(
f'Failed to parse tool call arguments: {tool_call.function.arguments}'
) from e
# ================================================
# CmdRunTool (Bash)
# ================================================
if tool_call.function.name == create_cmd_run_tool()['function']['name']:
if 'command' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "command" in tool call {tool_call.function.name}'
)
# convert is_input to boolean
is_input = arguments.get('is_input', 'false') == 'true'
action = CmdRunAction(command=arguments['command'], is_input=is_input)
# Set hard timeout if provided
if 'timeout' in arguments:
try:
action.set_hard_timeout(float(arguments['timeout']))
except ValueError as e:
raise FunctionCallValidationError(
f"Invalid float passed to 'timeout' argument: {arguments['timeout']}"
) from e
# ================================================
# IPythonTool (Jupyter)
# ================================================
elif tool_call.function.name == IPythonTool['function']['name']:
if 'code' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "code" in tool call {tool_call.function.name}'
)
action = IPythonRunCellAction(code=arguments['code'])
elif tool_call.function.name == 'delegate_to_browsing_agent':
action = AgentDelegateAction(
agent='BrowsingAgent',
inputs=arguments,
)
# ================================================
# AgentFinishAction
# ================================================
elif tool_call.function.name == FinishTool['function']['name']:
action = AgentFinishAction(
final_thought=arguments.get('message', ''),
task_completed=arguments.get('task_completed', None),
)
# ================================================
# LLMBasedFileEditTool (LLM-based file editor, deprecated)
# ================================================
elif tool_call.function.name == LLMBasedFileEditTool['function']['name']:
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
if 'content' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "content" in tool call {tool_call.function.name}'
)
action = FileEditAction(
path=arguments['path'],
content=arguments['content'],
start=arguments.get('start', 1),
end=arguments.get('end', -1),
)
elif (
tool_call.function.name
== create_str_replace_editor_tool()['function']['name']
):
if 'command' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "command" in tool call {tool_call.function.name}'
)
if 'path' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "path" in tool call {tool_call.function.name}'
)
path = arguments['path']
command = arguments['command']
other_kwargs = {
k: v for k, v in arguments.items() if k not in ['command', 'path']
}
if command == 'view':
action = FileReadAction(
path=path,
impl_source=FileReadSource.OH_ACI,
view_range=other_kwargs.get('view_range', None),
)
else:
if 'view_range' in other_kwargs:
# Remove view_range from other_kwargs since it is not needed for FileEditAction
other_kwargs.pop('view_range')
# Filter out unexpected arguments
valid_kwargs = {}
# Get valid parameters from the str_replace_editor tool definition
str_replace_editor_tool = create_str_replace_editor_tool()
valid_params = set(
str_replace_editor_tool['function']['parameters'][
'properties'
].keys()
)
for key, value in other_kwargs.items():
if key in valid_params:
valid_kwargs[key] = value
else:
raise FunctionCallValidationError(
f'Unexpected argument {key} in tool call {tool_call.function.name}. Allowed arguments are: {valid_params}'
)
action = FileEditAction(
path=path,
command=command,
impl_source=FileEditSource.OH_ACI,
**valid_kwargs,
)
# ================================================
# AgentThinkAction
# ================================================
elif tool_call.function.name == ThinkTool['function']['name']:
action = AgentThinkAction(thought=arguments.get('thought', ''))
# ================================================
# BrowserTool
# ================================================
elif tool_call.function.name == BrowserTool['function']['name']:
if 'code' not in arguments:
raise FunctionCallValidationError(
f'Missing required argument "code" in tool call {tool_call.function.name}'
)
action = BrowseInteractiveAction(browser_actions=arguments['code'])
# ================================================
# MCPAction (MCP)
# ================================================
elif mcp_tool_names and tool_call.function.name in mcp_tool_names:
action = MCPAction(
name=tool_call.function.name,
arguments=arguments,
)
else:
raise FunctionCallNotExistsError(
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
)
# We only add thought to the first action
if i == 0:
action = combine_thought(action, thought)
# Add metadata for tool calling
action.tool_call_metadata = ToolCallMetadata(
tool_call_id=tool_call.id,
function_name=tool_call.function.name,
model_response=response,
total_calls_in_response=len(assistant_msg.tool_calls),
)
actions.append(action)
else:
actions.append(
MessageAction(
content=str(assistant_msg.content) if assistant_msg.content else '',
wait_for_response=True,
)
)
# Add response id to actions
# This will ensure we can match both actions without tool calls (e.g. MessageAction)
# and actions with tool calls (e.g. CmdRunAction, IPythonRunCellAction, etc.)
# with the token usage data
for action in actions:
action.response_id = response.id
assert len(actions) >= 1
return actions