from chatarena.environments.base import Environment, TimeStep from chatarena.message import Message, MessagePool from typing import List, Dict, Union from chatarena.agent import Player from chatarena.backends import OpenAIChat from chatarena.arena import Arena from chatarena.utils import extract_code, extract_jsons from io import StringIO import sys import traceback class PythonREPL: """Simulates a standalone Python REPL.""" def __init__(self): self.globals = {} def run(self, command: str) -> str: old_stdout = sys.stdout sys.stdout = mystdout = StringIO() try: exec(command, self.globals) sys.stdout = old_stdout output = mystdout.getvalue() except Exception: sys.stdout = old_stdout output = traceback.format_exc() return output class IterativeCoding(Environment): type_name = "coding" def __init__(self, task:str=""): super().__init__(player_names=["coder", "verifier"]) self.task = task # The "state" of the environment is maintained by the message pool self.message_pool = MessagePool() self.phase = "code" # "code", "verify", "iterate" self.python_repl = PythonREPL() self.max_turns = 10 self._terminal = False self.reset() self.last_code = "" def get_next_player(self) -> str: if self.phase == "code": return "coder" elif self.phase == "iterate": return "coder" elif self.phase == "verify": return "verifier" def _moderator_speak(self, text: str, visible_to: Union[str, List[str]] = "all"): """ moderator say something """ message = Message(agent_name="Moderator", content=text, turn=self.turn, visible_to=visible_to) self.message_pool.append_message(message) def reset(self): self.turn = 0 self.message_pool.reset() self._moderator_speak(f"For the following task \n ```{self.task}```. " f"\n Write some testcases and then an actual function that implement the task. Everything should be in a single code block", visible_to="coder") observation = self.get_observation(self.get_next_player()) self._terminal = False self.turn += 1 return TimeStep(observation=observation, reward=self.get_zero_rewards(), terminal=self._terminal) def get_observation(self, player_name=None) -> List[Message]: if player_name is None: return self.message_pool.get_all_messages() else: return self.message_pool.get_visible_messages(player_name, turn=self.turn + 1) def process_broken(self): self._moderator_speak(f"The process is broken. Please restart the game.") self._terminal = True observation = self.get_observation(self.get_next_player()) return TimeStep(observation=observation, reward=self.get_zero_rewards(), terminal=self._terminal) def step(self, player_name: str, action: str) -> TimeStep: assert player_name == self.get_next_player(), f"Wrong player! It is {self.get_next_player()} turn." visible_to = "all" message = Message(agent_name=player_name, content=action, turn=self.turn, visible_to=visible_to) self.message_pool.append_message(message) if self.phase in ["iterate", "code"]: code_list = extract_code(action) if len(code_list) != 1: return self.process_broken() self.last_code = code_list[0] interpreter_output = self.python_repl.run(code_list[0]) self.phase = "verify" elif self.phase == "verify": json_list = extract_jsons(action) if len(json_list) != 1: return self.process_broken() if json_list[0]["result"] == "correct": self._terminal = True self._moderator_speak(f"Tests passed! Here's the code: \n ```{self.last_code}```") return TimeStep(observation=self.get_observation(self.get_next_player()), reward=self.get_one_rewards(), terminal=True) self.phase = "iterate" if self.phase == "verify": self._moderator_speak(f"Here's the outputs: {interpreter_output}. Is the code correct? Output with json format.", visible_to="verifier") elif self.phase == "iterate": self._moderator_speak(f"Now iterate your code with feedbacks. First think about why and then write the new code.", visible_to="coder") self.turn += 1 return TimeStep(observation=self.get_observation(self.get_next_player()), reward=self.get_zero_rewards(), terminal=self._terminal) if __name__ == "__main__": coder_role_description = """ You are a coder. You are going to follow a workflow of coding to implement a specific function. Your implementation will be tested by the verifier. If the implementation is wrong, you will try output new implementation given the feedback. Your output can include your reasoning process but the code part should always be surrounded by triple backticks. """ verifier_role_description = """ You are a verifier. You are going to verify if the code is correct or not according to the interpretor outputs. You should always output a json with following format: { "outputs_extraction": the outputs from the interpreter output showing the error or correctness of the code, "result": "correct" or "incorrect", } """ task = """ Write a python function for detecting if there's a json within a bunch of text. The input of this function is a string, and the output is a boolean. If there are multiple jsons in the string, return True if any of them is valid. """ coder = Player("coder", role_desc=coder_role_description, backend=OpenAIChat(max_tokens=1024, model="gpt-4")) verifier = Player("verifier", role_desc=verifier_role_description, backend=OpenAIChat(max_tokens=1024, model="gpt-4")) env = IterativeCoding(task=task) arena = Arena([coder, verifier], env) arena.launch_cli()