Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

hyyh28 commited on Dec 24, 2023

Commit

1aa4792

1 Parent(s): eb1b37d

update atari_env

Browse files

update atari_env, also make a test_atari.sh

Files changed (10) hide show

deciders/parser.py +20 -1
envs/__init__.py +44 -1
envs/atari/Boxing_policies.py +161 -0
envs/atari/Boxing_translator.py +99 -0
envs/atari/Pong_policies.py +65 -0
envs/atari/Pong_translator.py +67 -0
envs/atari/__init__.py +1 -0
envs/atari/represented_atari_game.py +221 -0
record_reflexion.csv +2 -0
test_atari.sh +1 -0

deciders/parser.py CHANGED Viewed

@@ -40,6 +40,25 @@ class SixAction(BaseModel):
         if field not in [1, 2, 3, 4, 5, 6]:
             raise ValueError("Action is not valid ([1, 2, 3, 4, 5, 6])!")
         return field
 class ContinuousAction(BaseModel):
     action: float = Field(description="the choosed action to perform")
@@ -50,4 +69,4 @@ class ContinuousAction(BaseModel):
             raise ValueError("Action is not valid ([-1,1])!")
         return field
-PARSERS = {1:ContinuousAction, 2: TwoAction, 3: ThreeAction, 4: FourAction, 6: SixAction}

         if field not in [1, 2, 3, 4, 5, 6]:
             raise ValueError("Action is not valid ([1, 2, 3, 4, 5, 6])!")
         return field
+class NineAction(BaseModel):
+    action: int = Field(description="the choosed action to perform")
+    # You can add custom validation logic easily with Pydantic.
+    @validator('action')
+    def action_is_valid(cls, field):
+        if field not in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
+            raise ValueError("Action is not valid ([1, 2, 3, 4, 5, 6, 7, 8, 9])!")
+        return field
+class FullAtariAction(BaseModel):
+    action: int = Field(description="the choosed action to perform")
+    @validator('action')
+    def action_is_valid(cls, info):
+        if info not in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]:
+            raise ValueError("Action is not valid ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])!")
+        return info
 class ContinuousAction(BaseModel):
     action: float = Field(description="the choosed action to perform")
             raise ValueError("Action is not valid ([-1,1])!")
         return field
+PARSERS = {1:ContinuousAction, 2: TwoAction, 3: ThreeAction, 4: FourAction, 6: SixAction, 9:NineAction, 18: FullAtariAction}

envs/__init__.py CHANGED Viewed

@@ -10,6 +10,10 @@ from .toy_text import blackjack_translator, blackjack_policies
 from .toy_text import taxi_translator, taxi_policies
 from .toy_text import cliffwalking_translator, cliffwalking_policies
 from .toy_text import frozenlake_translator, frozenlake_policies
 REGISTRY = {}
 REGISTRY["sampling_wrapper"] = SettableStateEnv
@@ -48,4 +52,43 @@ REGISTRY["frozenlake_policies"] = [frozenlake_policies.dedicated_1_policy, froze
 REGISTRY["mountaincarContinuous_init_translator"] = mountaincarContinuous_translator.GameDescriber
 REGISTRY["mountaincarContinuous_basic_translator"] = mountaincarContinuous_translator.BasicStateSequenceTranslator
-REGISTRY["mountaincarContinuous_policies"] = [mountaincarContinuous_policies.pseudo_random_policy, mountaincarContinuous_policies.real_random_policy]

 from .toy_text import taxi_translator, taxi_policies
 from .toy_text import cliffwalking_translator, cliffwalking_policies
 from .toy_text import frozenlake_translator, frozenlake_policies
+from .atari import register_environments
+from .atari import Boxing_policies, Boxing_translator, Pong_policies, Pong_translator
+register_environments()
 REGISTRY = {}
 REGISTRY["sampling_wrapper"] = SettableStateEnv
 REGISTRY["mountaincarContinuous_init_translator"] = mountaincarContinuous_translator.GameDescriber
 REGISTRY["mountaincarContinuous_basic_translator"] = mountaincarContinuous_translator.BasicStateSequenceTranslator
+REGISTRY["mountaincarContinuous_policies"] = [mountaincarContinuous_policies.pseudo_random_policy, mountaincarContinuous_policies.real_random_policy]
+REGISTRY["RepresentedBoxing_init_translator"] = Boxing_translator.GameDescriber
+REGISTRY["RepresentedBoxing_basic_translator"] = Boxing_translator.BasicStateSequenceTranslator
+REGISTRY["RepresentedBoxing_basic_policies"] = [
+    Boxing_policies.real_random_policy,
+    Boxing_policies.pseudo_random_policy,
+    Boxing_policies.dedicated_1_policy,
+    Boxing_policies.dedicated_2_policy,
+    Boxing_policies.dedicated_3_policy,
+    Boxing_policies.dedicated_4_policy,
+    Boxing_policies.dedicated_5_policy,
+    Boxing_policies.dedicated_6_policy,
+    Boxing_policies.dedicated_7_policy,
+    Boxing_policies.dedicated_8_policy,
+    Boxing_policies.dedicated_9_policy,
+    Boxing_policies.dedicated_10_policy,
+    Boxing_policies.dedicated_11_policy,
+    Boxing_policies.dedicated_12_policy,
+    Boxing_policies.dedicated_13_policy,
+    Boxing_policies.dedicated_14_policy,
+    Boxing_policies.dedicated_15_policy,
+    Boxing_policies.dedicated_16_policy,
+    Boxing_policies.dedicated_17_policy,
+    Boxing_policies.dedicated_18_policy
+]
+REGISTRY["RepresentedPong_init_translator"] = Pong_translator.GameDescriber
+REGISTRY["RepresentedPong_basic_translator"] = Pong_translator.BasicStateSequenceTranslator
+REGISTRY["RepresentedPong_basic_policies"] = [
+    Pong_policies.real_random_policy,
+    Pong_policies.pseudo_random_policy,
+    Pong_policies.dedicated_1_policy,
+    Pong_policies.dedicated_2_policy,
+    Pong_policies.dedicated_3_policy,
+    Pong_policies.dedicated_4_policy,
+    Pong_policies.dedicated_5_policy,
+    Pong_policies.dedicated_6_policy,
+]

envs/atari/Boxing_policies.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import numpy as np
+def dedicated_1_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 1 which does NOOP (no operation)"
+    dedicated_1_policy.description = get_description()
+    return 1
+def dedicated_2_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 2 which hits the enemy"
+    dedicated_1_policy.description = get_description()
+    return 2
+def dedicated_3_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 3 which moves the agent up"
+    dedicated_3_policy.description = get_description()
+    return 3
+def dedicated_4_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 4 which moves the agent right"
+    dedicated_4_policy.description = get_description()
+    return 4
+def dedicated_5_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 5 which moves the agent left"
+    dedicated_5_policy.description = get_description()
+    return 5
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select an action among 1 to 18 alternatively"
+    pseudo_random_policy.description = get_description()
+    return pre_action % 18 + 1
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return np.random.choice(range(0, 18)) + 1
+# Complete set of dedicated action policies
+def dedicated_6_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 6 which moves the agent down"
+    dedicated_6_policy.description = get_description()
+    return 6
+def dedicated_7_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 7 which moves the agent up and to the right"
+    dedicated_7_policy.description = get_description()
+    return 7
+def dedicated_8_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 8 which moves the agent up and to the left"
+    dedicated_8.description = get_description()
+    return 8
+def dedicated_9_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 9 which moves the agent down and to the right"
+    dedicated_9.description = get_description()
+    return 9
+def dedicated_10_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 10 which moves the agent down and to the left"
+    dedicated_10_policy.description = get_description()
+    return 10
+def dedicated_11_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 11 which moves the agent up while hiting the enemy"
+    dedicated_11_policy.description = get_description()
+    return 11
+def dedicated_12_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 12 which moves the agent right while hiting the enemy"
+    dedicated_12_policy.description = get_description()
+    return 12
+def dedicated_13_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 13 which moves the agent left while hiting the enemy"
+    dedicated_13_policy.description = get_description()
+    return 13
+def dedicated_14_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 14 which moves the agent down while hiting the enemy"
+    dedicated_14_policy.description = get_description()
+    return 14
+def dedicated_15_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 15 which moves the agent up and to the right while hiting the enemy"
+    dedicated_15_policy.description = get_description()
+    return 15
+def dedicated_16_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 16 which moves the agent up and to the left while hiting the enemy"
+    dedicated_16_policy.description = get_description()
+    return 16
+def dedicated_17_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 17 which moves the agent down and to the right while hiting the enemy"
+    dedicated_17_policy.description = get_description()
+    return 17
+def dedicated_18_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 18 which moves the agent down and to the left while hiting the enemy"
+    dedicated_18_policy.description = get_description()
+    return 18

envs/atari/Boxing_translator.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# [Translator classes and functions for Atari Boxing environment]
+class BasicLevelTranslator:
+    def __init__(self,):
+        pass
+    def translate(self, state):
+        player_x, player_y, enemy_x, enemy_y, enemy_score, clock, player_score = state
+        return f"The player is at position ({player_x, player_y}, your opponent is at position ({enemy_x, enemy_y}) ), " \
+               f"your oppoent's score is {enemy_score}, your score is {player_score}. Move left and right will change the player_x while move up and down will change the player_y"
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+        }
+        self.reward_desc_dict = {
+        }
+    def describe_goal(self):
+        return "The goal is to knock out your opponent."
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_game(self):
+        return "In the Boxing game, you fight an opponent in a boxing ring. You score points for hitting the opponent. " \
+               "If you score 100 points, your opponent is knocked out.Scoring Points: When you get near enough to your opponent to throw a punch, " \
+               "press the red button. Each punch moves your opponent slightly back and away from the punch." \
+               " If you move him to the ropes, he can't easily duck the next punch, " \
+               "and you can set up a real scoring barrage. But don't get caughton the ropes yourself! " \
+               "Watch your distance. If you move in too close, the computer gets tougher;" \
+               " but if you're too far away, you can't land scoring punches. "
+    def describe_action(self):
+        return "Your Next Move: \n Please choose an action. Type '1' for NOOP (no operation), '2' to hit your opponent, " \
+               "'3' to move up, '4' to move right, '5' to move left, '6' to move down, '7' to move up-right, " \
+               "'8' to move up-left, '9' to move down-right, '10' to move down-left, '11' to hit your opponent and move up, " \
+               "'12' to hit your opponent and move right, '13' to hit your opponent and move left, '14' to hit your opponent and move down, " \
+               "'15' to hit your opponent and move up-right, '16' to hit your opponent and move up-left, '17' to hit your opponent and move down-right, " \
+               "or '18' to hit your opponent and move down-left. Ensure you only provide the action number " \
+               "from the valid action list, i.e., [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]."
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            if info['action'] == 1:
+                action_desc = f"Take Action: 'Do nothing'"
+            elif info['action'] == 2:
+                action_desc = f"Take Action: 'Hit your opponent'"
+            elif info['action'] == 3:
+                action_desc = f"Take Action: 'Move up'"
+            elif info['action'] == 4:
+                action_desc = f"Take Action: 'Move right'"
+            elif info['action'] == 5:
+                action_desc = f"Take Action: 'Move left'"
+            elif info['action'] == 6:
+                action_desc = f"Take Action: 'Move down'"
+            elif info['action'] == 7:
+                action_desc = f"Take Action: 'Move up-right'"
+            elif info['action'] == 8:
+                action_desc = f"Take Action: 'Move up-lefr'"
+            elif info['action'] == 9:
+                action_desc = f"Take Action: 'Move down-right'"
+            elif info['action'] == 10:
+                action_desc = f"Take Action: 'Move down-left'"
+            elif info['action'] == 11:
+                action_desc = f"Take Action: 'Hit your opponent and move up'"
+            elif info['action'] == 12:
+                action_desc = f"Take Action: 'Hit your opponent and move right'"
+            elif info['action'] == 13:
+                action_desc = f"Take Action: 'Hit your opponent and move left'"
+            elif info['action'] == 14:
+                action_desc = f"Take Action: 'Hit your opponent and move down'"
+            elif info['action'] == 15:
+                action_desc = f"Take Action: 'Hit your opponent and move up-right'"
+            elif info['action'] == 16:
+                action_desc = f"Take Action: 'Hit your opponent and move up-left'"
+            elif info['action'] == 17:
+                action_desc = f"Take Action: 'Hit your opponent and move down-right'"
+            else:
+                action_desc = f"Take Action: 'Hit your opponent and move down-left'"
+            reward_desc = f"Result: Reward of {info['reward']}, "
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(f"{state_desc}.\n {action_desc} \n {reward_desc} \n Transit to {next_state_desc}")
+        return descriptions

envs/atari/Pong_policies.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import numpy as np
+def dedicated_1_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 1 which does NOOP (no operation)"
+    dedicated_1_policy.description = get_description()
+    return 1
+def dedicated_2_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 2 which hits the ball"
+    dedicated_1_policy.description = get_description()
+    return 2
+def dedicated_3_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 3 which moves the agent right"
+    dedicated_3_policy.description = get_description()
+    return 3
+def dedicated_4_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 4 which moves the agent left"
+    dedicated_4_policy.description = get_description()
+    return 4
+def dedicated_5_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 5 which moves the agent right while hiting the ball"
+    dedicated_5_policy.description = get_description()
+    return 5
+def pseudo_random_policy(state, pre_action):
+    def get_description():
+        return "Select an action among 1 to 6 alternatively"
+    pseudo_random_policy.description = get_description()
+    return pre_action % 6 + 1
+def real_random_policy(state, pre_action=1):
+    def get_description():
+        return "Select action with a random policy"
+    real_random_policy.description = get_description()
+    return np.random.choice(range(0, 6)) + 1
+# Complete set of dedicated action policies
+def dedicated_6_policy(state, pre_action=1):
+    def get_description():
+        return "Always select action 5 which moves the agent left while hiting the ball"
+    dedicated_6_policy.description = get_description()
+    return 6

envs/atari/Pong_translator.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# [Translator classes and functions for Atari Boxing environment]
+#'labels': {'player_y': 109, 'player_x': 188, 'enemy_y': 20, 'enemy_x': 64, 'ball_x': 0, 'ball_y': 0, 'enemy_score': 0, 'player_score': 0}
+class BasicLevelTranslator:
+    def __init__(self, ):
+        pass
+    def translate(self, state):
+        player_y, player_x, enemy_y, enemy_x, ball_x, ball_y, enemy_score, player_score = state
+        return f"The player is at position ({player_y, player_x}, your opponent is at position ({enemy_y, enemy_x}) ), the ball is at ({ball_y, ball_x})" \
+               f"your oppoent's score is {enemy_score}, your score is {player_score}."
+class GameDescriber:
+    def __init__(self, args):
+        self.is_only_local_obs = args.is_only_local_obs == 1
+        self.max_episode_len = args.max_episode_len
+        self.action_desc_dict = {
+        }
+        self.reward_desc_dict = {
+        }
+    def describe_goal(self):
+        return "The goal is to knock out your opponent."
+    def translate_terminate_state(self, state, episode_len, max_episode_len):
+        return ""
+    def translate_potential_next_state(self, state, action):
+        return ""
+    def describe_game(self):
+        return "In the Pong game, you play the ball with your opponent, each player rallys the ball by moving the paddles on the playfield. " \
+               "Paddles move only vertically on the playfield. A player scores one point when the opponent hits the ball out of bounds or misses a hit. " \
+               "The first player to score 21 points wins the game."
+    def describe_action(self):
+        return "Your Next Move: \n Please choose an action. Type '1' for NOOP (no operation), '2' to hit the ball, " \
+               "'3' to move right, '4' to move left, '5' to move right while hit the ball, '6' to move left while hit the ball. Ensure you only provide the action number " \
+               "from the valid action list, i.e., [1, 2, 3, 4, 5, 6]."
+class BasicStateSequenceTranslator(BasicLevelTranslator):
+    def translate(self, infos, is_current=False):
+        descriptions = []
+        if is_current:
+            state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
+            return state_desc
+        for i, info in enumerate(infos):
+            assert 'state' in info, "info should contain state information"
+            state_desc = BasicLevelTranslator().translate(info['state'])
+            if info['action'] == 1:
+                action_desc = f"Take Action: 'Do nothing'"
+            elif info['action'] == 2:
+                action_desc = f"Take Action: 'Hit your ball'"
+            elif info['action'] == 3:
+                action_desc = f"Take Action: 'Move right'"
+            elif info['action'] == 4:
+                action_desc = f"Take Action: 'Move left'"
+            elif info['action'] == 5:
+                action_desc = f"Take Action: 'Move right while hiting the ball'"
+            else:
+                action_desc = f"Take Action: 'Move left while hiting the ball'"
+            reward_desc = f"Result: Reward of {info['reward']}, "
+            next_state_desc = BasicLevelTranslator().translate(info['next_state'])
+            descriptions.append(f"{state_desc}.\n {action_desc} \n {reward_desc} \n Transit to {next_state_desc}")
+        return descriptions

envs/atari/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .represented_atari_game import register_environments

envs/atari/represented_atari_game.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import gym
+import ale_py
+import numpy as np
+from atariari.benchmark.wrapper import AtariARIWrapper
+from typing import Optional, Union
+class RepresentedAtariEnv(gym.Wrapper):
+    def __init__(self, env_name, render_mode=None):
+        super().__init__(AtariARIWrapper(gym.make(env_name, render_mode=render_mode)))
+        self.metadata = self.env.metadata
+        self.env_name = env_name
+        self.observation = None
+        self.info = {}
+        self.action_space = self.env.action_space
+        _ = self.env.reset()
+        obs = self.env.labels()
+        obs_dim = len(obs)
+        self.obs_label = obs.keys()
+        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
+    def step(self, action):
+        original_next_obs, reward, env_done, env_truncated, info = self.env.step(action)
+        next_obs = self.env.labels()
+        self.obs_label = next_obs.keys()
+        self.observation = next_obs
+        return np.array(list(next_obs.values())), reward, env_done, env_truncated, info
+    def reset(self, seed=0):
+        obs_original, info = self.env.reset(seed=seed)
+        obs = self.env.labels()
+        self.obs_label = obs.keys()
+        self.observation = obs
+        return np.array(list(obs.values())), info
+    def get_info(self):
+        return self.observation
+    def render(self, render_mode=None):
+        return self.env.render()
+class RepresentedMsPacman(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "MsPacmanNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedBowling(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "BowlingNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedBoxing(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "BoxingNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedBreakout(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "BreakoutNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedDemonAttack(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "DemonAttackNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedFreeway(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "FreewayNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedFrostbite(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "FrostbiteNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedHero(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "HeroNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedMontezumaRevenge(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "MontezumaRevengeNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedPitfall(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "PitfallNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedPong(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "PongNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedPrivateEye(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "PrivateEyeNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedQbert(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "QbertNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedRiverraid(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "RiverraidNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedSeaquest(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "SeaquestNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedSpaceInvaders(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "SpaceInvadersNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedTennis(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "TennisNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedVenture(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "VentureNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+class RepresentedVideoPinball(RepresentedAtariEnv):
+    def __init__(self, render_mode: Optional[str]=None):
+        env_name = "VideoPinballNoFrameskip-v4"
+        super().__init__(env_name=env_name, render_mode=render_mode)
+def env_factory(env_class):
+    def _create_instance(render_mode=None):
+        return env_class(render_mode=render_mode)
+    return _create_instance
+def register_environments():
+    env_classes = {
+        'RepresentedMsPacman-v0': RepresentedMsPacman,
+        'RepresentedBowling-v0': RepresentedBowling,
+        'RepresentedBoxing-v0': RepresentedBoxing,
+        'RepresentedBreakout-v0': RepresentedBreakout,
+        'RepresentedDemonAttack-v0': RepresentedDemonAttack,
+        'RepresentedFreeway-v0': RepresentedFreeway,
+        'RepresentedFrostbite-v0': RepresentedFrostbite,
+        'RepresentedHero-v0': RepresentedHero,
+        'RepresentedMontezumaRevenge-v0': RepresentedMontezumaRevenge,
+        'RepresentedPitfall-v0': RepresentedPitfall,
+        'RepresentedPong-v0': RepresentedPong,
+        'RepresentedPrivateEye-v0': RepresentedPrivateEye,
+        'RepresentedQbert-v0': RepresentedQbert,
+        'RepresentedRiverraid-v0': RepresentedRiverraid,
+        'RepresentedSeaquest-v0': RepresentedSeaquest,
+        'RepresentedSpaceInvaders-v0': RepresentedSpaceInvaders,
+        'RepresentedTennis-v0': RepresentedTennis,
+        'RepresentedVenture-v0': RepresentedVenture,
+        'RepresentedVideoPinball-v0': RepresentedVideoPinball
+    }
+    for env_name, env_class in env_classes.items():
+        gym.register(
+            id=env_name,
+            entry_point=env_factory(env_class),
+        )
+# register_environments()
+# env_classes = {
+#     'RepresentedMsPacman-v0': RepresentedMsPacman,
+#     'RepresentedBowling-v0': RepresentedBowling,
+#     'RepresentedBoxing-v0': RepresentedBoxing,
+#     'RepresentedBreakout-v0': RepresentedBreakout,
+#     'RepresentedDemonAttack-v0': RepresentedDemonAttack,
+#     'RepresentedFreeway-v0': RepresentedFreeway,
+#     'RepresentedFrostbite-v0': RepresentedFrostbite,
+#     'RepresentedHero-v0': RepresentedHero,
+#     'RepresentedMontezumaRevenge-v0': RepresentedMontezumaRevenge,
+#     'RepresentedPitfall-v0': RepresentedPitfall,
+#     'RepresentedPong-v0': RepresentedPong,
+#     'RepresentedPrivateEye-v0': RepresentedPrivateEye,
+#     'RepresentedQbert-v0': RepresentedQbert,
+#     'RepresentedRiverraid-v0': RepresentedRiverraid,
+#     'RepresentedSeaquest-v0': RepresentedSeaquest,
+#     'RepresentedSpaceInvaders-v0': RepresentedSpaceInvaders,
+#     'RepresentedTennis-v0': RepresentedTennis,
+#     'RepresentedVenture-v0': RepresentedVenture,
+#     'RepresentedVideoPinball-v0': RepresentedVideoPinball
+# }
+#
+# for env, env_class in env_classes.items():
+#     env_1 = env_class()
+#     env_name = env_1.env_name
+#     env_2 = gym.make(env_name)
+#     print(env_name, env_1.action_space == env_2.action_space, env_1.action_space)

record_reflexion.csv CHANGED Viewed

@@ -8,4 +8,6 @@ Taxi-v3,1,expert,200.0
 CliffWalking-v0,1,expert,200.0
 FrozenLake-v1,1,expert,200.0
 MountainCarContinuous-v0,1,expert,200.0

 CliffWalking-v0,1,expert,200.0
 FrozenLake-v1,1,expert,200.0
 MountainCarContinuous-v0,1,expert,200.0
+RepresentedBoxing-v0,1,expert,200.0
+RepresentedPong-v0,1,expert,200.0

test_atari.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python main_reflexion.py --env_name RepresentedBoxing-v0 --init_summarizer RepresentedBoxing_init_translator --curr_summarizer RepresentedBoxing_basic_translator --decider naive_actor --prompt_level 1 --num_trails 1 --seed 0