Spaces:
Runtime error
Runtime error
from chatarena.environments.base import Environment, TimeStep | |
from chatarena.message import Message, MessagePool | |
from typing import List, Dict, Union | |
from chatarena.agent import Player | |
from chatarena.backends import OpenAIChat | |
from chatarena.arena import Arena | |
from chatarena.utils import extract_code, extract_jsons | |
from io import StringIO | |
import sys | |
import traceback | |
class PythonREPL: | |
"""Simulates a standalone Python REPL.""" | |
def __init__(self): | |
self.globals = {} | |
def run(self, command: str) -> str: | |
old_stdout = sys.stdout | |
sys.stdout = mystdout = StringIO() | |
try: | |
exec(command, self.globals) | |
sys.stdout = old_stdout | |
output = mystdout.getvalue() | |
except Exception: | |
sys.stdout = old_stdout | |
output = traceback.format_exc() | |
return output | |
class IterativeCoding(Environment): | |
type_name = "coding" | |
def __init__(self, task:str=""): | |
super().__init__(player_names=["coder", "verifier"]) | |
self.task = task | |
# The "state" of the environment is maintained by the message pool | |
self.message_pool = MessagePool() | |
self.phase = "code" # "code", "verify", "iterate" | |
self.python_repl = PythonREPL() | |
self.max_turns = 10 | |
self._terminal = False | |
self.reset() | |
self.last_code = "" | |
def get_next_player(self) -> str: | |
if self.phase == "code": | |
return "coder" | |
elif self.phase == "iterate": | |
return "coder" | |
elif self.phase == "verify": | |
return "verifier" | |
def _moderator_speak(self, text: str, visible_to: Union[str, List[str]] = "all"): | |
""" | |
moderator say something | |
""" | |
message = Message(agent_name="Moderator", content=text, turn=self.turn, visible_to=visible_to) | |
self.message_pool.append_message(message) | |
def reset(self): | |
self.turn = 0 | |
self.message_pool.reset() | |
self._moderator_speak(f"For the following task \n ```{self.task}```. " | |
f"\n Write some testcases and then an actual function that implement the task. Everything should be in a single code block", visible_to="coder") | |
observation = self.get_observation(self.get_next_player()) | |
self._terminal = False | |
self.turn += 1 | |
return TimeStep(observation=observation, reward=self.get_zero_rewards(), terminal=self._terminal) | |
def get_observation(self, player_name=None) -> List[Message]: | |
if player_name is None: | |
return self.message_pool.get_all_messages() | |
else: | |
return self.message_pool.get_visible_messages(player_name, turn=self.turn + 1) | |
def process_broken(self): | |
self._moderator_speak(f"The process is broken. Please restart the game.") | |
self._terminal = True | |
observation = self.get_observation(self.get_next_player()) | |
return TimeStep(observation=observation, reward=self.get_zero_rewards(), terminal=self._terminal) | |
def step(self, player_name: str, action: str) -> TimeStep: | |
assert player_name == self.get_next_player(), f"Wrong player! It is {self.get_next_player()} turn." | |
visible_to = "all" | |
message = Message(agent_name=player_name, content=action, turn=self.turn, visible_to=visible_to) | |
self.message_pool.append_message(message) | |
if self.phase in ["iterate", "code"]: | |
code_list = extract_code(action) | |
if len(code_list) != 1: | |
return self.process_broken() | |
self.last_code = code_list[0] | |
interpreter_output = self.python_repl.run(code_list[0]) | |
self.phase = "verify" | |
elif self.phase == "verify": | |
json_list = extract_jsons(action) | |
if len(json_list) != 1: | |
return self.process_broken() | |
if json_list[0]["result"] == "correct": | |
self._terminal = True | |
self._moderator_speak(f"Tests passed! Here's the code: \n ```{self.last_code}```") | |
return TimeStep(observation=self.get_observation(self.get_next_player()), | |
reward=self.get_one_rewards(), | |
terminal=True) | |
self.phase = "iterate" | |
if self.phase == "verify": | |
self._moderator_speak(f"Here's the outputs: {interpreter_output}. Is the code correct? Output with json format.", | |
visible_to="verifier") | |
elif self.phase == "iterate": | |
self._moderator_speak(f"Now iterate your code with feedbacks. First think about why and then write the new code.", visible_to="coder") | |
self.turn += 1 | |
return TimeStep(observation=self.get_observation(self.get_next_player()), | |
reward=self.get_zero_rewards(), | |
terminal=self._terminal) | |
if __name__ == "__main__": | |
coder_role_description = """ | |
You are a coder. You are going to follow a workflow of coding to implement a specific function. | |
Your implementation will be tested by the verifier. If the implementation is wrong, you will try output new implementation given the feedback. | |
Your output can include your reasoning process but the code part should always be surrounded by triple backticks. | |
""" | |
verifier_role_description = """ | |
You are a verifier. You are going to verify if the code is correct or not according to the interpretor outputs. | |
You should always output a json with following format: | |
{ | |
"outputs_extraction": the outputs from the interpreter output showing the error or correctness of the code, | |
"result": "correct" or "incorrect", | |
} | |
""" | |
task = """ | |
Write a python function for detecting if there's a json within a bunch of text. | |
The input of this function is a string, and the output is a boolean. | |
If there are multiple jsons in the string, return True if any of them is valid. | |
""" | |
coder = Player("coder", role_desc=coder_role_description, | |
backend=OpenAIChat(max_tokens=1024, model="gpt-4")) | |
verifier = Player("verifier", role_desc=verifier_role_description, | |
backend=OpenAIChat(max_tokens=1024, model="gpt-4")) | |
env = IterativeCoding(task=task) | |
arena = Arena([coder, verifier], env) | |
arena.launch_cli() | |