from browsergym.core.action.highlevel import HighLevelActionSet from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk from openhands.llm.tool_names import BROWSER_TOOL_NAME # from browsergym/core/action/highlevel.py _browser_action_space = HighLevelActionSet( subsets=['bid', 'nav'], strict=False, # less strict on the parsing of the actions multiaction=True, # enable to agent to take multiple actions at once ) _BROWSER_DESCRIPTION = """Interact with the browser using Python code. Use it ONLY when you need to interact with a webpage. See the description of "code" parameter for more details. Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page. More than 2-3 actions usually leads to failure or unexpected behavior. Example: fill('a12', 'example with "quotes"') click('a51') click('48', button='middle', modifiers=['Shift']) You can also use the browser to view pdf, png, jpg files. You should first check the content of /tmp/oh-server-url to get the server url, and then use it to view the file by `goto("{server_url}/view?path={absolute_file_path}")`. For example: `goto("http://localhost:8000/view?path=/workspace/test_document.pdf")` Note: The file should be downloaded to the local machine first before using the browser to view it. """ _BROWSER_TOOL_DESCRIPTION = """ The following 15 functions are available. Nothing else is supported. goto(url: str) Description: Navigate to a url. Examples: goto('http://www.example.com') go_back() Description: Navigate to the previous page in history. Examples: go_back() go_forward() Description: Navigate to the next page in history. Examples: go_forward() noop(wait_ms: float = 1000) Description: Do nothing, and optionally wait for the given time (in milliseconds). You can use this to get the current page content and/or wait for the page to load. Examples: noop() noop(500) scroll(delta_x: float, delta_y: float) Description: Scroll horizontally and vertically. Amounts in pixels, positive for right or down scrolling, negative for left or up scrolling. Dispatches a wheel event. Examples: scroll(0, 200) scroll(-50.2, -100.5) fill(bid: str, value: str) Description: Fill out a form field. It focuses the element and triggers an input event with the entered text. It works for ,