Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / evaluation /benchmarks /mint /env.py

Backup-bdg

Upload 964 files

51ff9e5 verified 7 days ago

raw

history blame

4.76 kB

	import re
	import traceback

	from datatypes import ParseError, StepOutput, TaskState
	from tasks.base import Task

	from openhands.controller.state.state import State


	class SimplifiedEnv:
	INVALID_INPUT_MESSAGE = (
	"I don't understand your input. \n"
	'If you want to execute code, please use <execute_ipython> YOUR_CODE_HERE </execute_ipython>.\n'
	'If you want to give me an answer, please use <solution> YOUR_SOLUTION_HERE </solution>.\n'
	'For example: The answer to the question is <solution> 42 </solution>. \n'
	)

	def __init__(self, agent_state: State, task: Task, task_config: dict[str, int]):
	self.agent_state = agent_state
	self.task = task

	agent_action_count = {
	'propose_solution': 0,
	'use_tool': 0,
	'invalid_action': 0,
	}
	# check if agent_state has attribute turn_info set
	if hasattr(self.agent_state, 'propose_solution_count'):
	agent_action_count['propose_solution'] = (
	self.agent_state.propose_solution_count
	)

	self.task_state = TaskState(agent_action_count=agent_action_count)

	self.task_config = task_config

	def step(self, lm_message: str):
	observation = self.handle_propose_solution(lm_message)

	self.check_max_iteration()

	turn_info = (
	self.task_config['max_iterations'] - self.agent_state.iteration,
	self.task_config['max_propose_solution']
	- self.task_state.agent_action_count['propose_solution'],
	)

	output = StepOutput(
	observation=observation,
	success=self.task_state.success,
	turn_info=turn_info,
	)

	self.agent_state.propose_solution_count = self.task_state.agent_action_count[
	'propose_solution'
	]
	self.log_output(output)
	return self.task_state

	def handle_propose_solution(self, lm_message) -> str \| None:
	"""Propose answer to check the task success.

	It might set self.state.finished = True if the task is successful.
	"""
	self.task_state.agent_action_count['propose_solution'] += 1
	try:
	parsed = self.parse_propose_solution(lm_message)
	task_success = self.check_task_success(parsed['answer'])
	if task_success:
	self.task_state.finished = True
	self.task_state.success = True
	self.task_state.terminate_reason = 'task_success'
	# NOTE: should not return the function now, because we need to log the output
	# Set state.finished = True will terminate the episode
	except ParseError:
	return SimplifiedEnv.INVALID_INPUT_MESSAGE
	except Exception:
	error_traceback = traceback.format_exc()
	return f'{error_traceback}'

	def parse_propose_solution(self, lm_message: str) -> dict:
	"""Define the parsing logic."""
	lm_output = '\n' + lm_message + '\n'

	answer = '\n'.join(
	[
	i.strip()
	for i in re.findall(r'<solution>(.*?)</solution>', lm_output, re.DOTALL)
	]
	)
	if answer == '':
	raise ParseError('No answer found.')

	return {'answer': answer}

	def log_output(self, output: StepOutput) -> None:
	if self.task_state.finished:
	return

	content = output.to_str()
	self.task_state.latest_output = output.to_dict()
	self.task_state.latest_output['content'] = content

	def check_task_success(self, answer: str) -> bool:
	# log_message.info(f"STUDENT ANSWER: [{answer}]")
	# log_message.info(f"REFERENCE ANSWER: [{self.task.reference}]")
	return self.task.success(answer)

	def check_max_iteration(self):
	"""Check if the agent has reached the max iteration limit.

	It might set self.state.finished = True if the agent has reached the max iteration limit.
	"""
	if self.task_state.finished:
	# ignore if the episode is already finished (e.g., task success)
	return

	if (
	# propose solution > max output solution
	self.task_state.agent_action_count['propose_solution']
	>= self.task_config['max_propose_solution']
	):
	self.task_state.finished = True
	self.task_state.success = False
	self.task_state.terminate_reason = 'max_propose_steps'
	elif self.agent_state.iteration >= self.task_config['max_iterations']:
	self.task_state.finished = True
	self.task_state.success = False
	self.task_state.terminate_reason = 'max_iterations'