CharlesZhang commited on
Commit
6299d2b
·
1 Parent(s): 91977f9

add: Pusher env

Browse files
envs/__init__.py CHANGED
@@ -108,8 +108,9 @@ from .mujoco import reacher_translator, reacher_policies
108
  from .mujoco import hopper_translator, hopper_policies
109
  from .mujoco import walker2d_translator, walker2d_policies
110
 
111
- from .mujoco import halfcheetah_translator, halfcheetah_policies
112
- from .mujoco import ant_translator, ant_policies
 
113
 
114
  REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
115
  REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
@@ -135,12 +136,17 @@ REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTr
135
  REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
136
 
137
 
138
-
139
  REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
140
  REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
141
  REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
142
 
 
 
 
 
143
 
 
144
  REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
145
  REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
146
  REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
 
108
  from .mujoco import hopper_translator, hopper_policies
109
  from .mujoco import walker2d_translator, walker2d_policies
110
 
111
+
112
+
113
+
114
 
115
  REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
116
  REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
 
136
  REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
137
 
138
 
139
+ from .mujoco import halfcheetah_translator, halfcheetah_policies
140
  REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
141
  REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
142
  REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
143
 
144
+ from .mujoco import pusher_translator, pusher_policies
145
+ REGISTRY["pusher_init_translator"] = pusher_translator.GameDescriber
146
+ REGISTRY["pusher_basic_translator"] = pusher_translator.BasicStateSequenceTranslator
147
+ REGISTRY["pusher_policies"] = [pusher_policies.pseudo_random_policy, pusher_policies.real_random_policy]
148
 
149
+ from .mujoco import ant_translator, ant_policies
150
  REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
151
  REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
152
  REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
envs/mujoco/ant_translator.py CHANGED
@@ -1,4 +1,4 @@
1
- '''
2
  Action Space Box(-1.0, 1.0, (8,), float32)
3
  Observation Space Box(-inf, inf, (27,), float64)
4
  '''
 
1
+ '''Ant
2
  Action Space Box(-1.0, 1.0, (8,), float32)
3
  Observation Space Box(-inf, inf, (27,), float64)
4
  '''
envs/mujoco/pusher_policies.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import random
3
+
4
+ def pseudo_random_policy(state, pre_action):
5
+ def get_description():
6
+ return "Select action randomly"
7
+ pseudo_random_policy.description = get_description()
8
+ return [4 * random.random() - 2 for i in range(7)]
9
+
10
+
11
+ def real_random_policy(state, pre_action=1):
12
+ def get_description():
13
+ return "Select action with a random policy"
14
+ real_random_policy.description = get_description()
15
+ return [4 * random.random() - 2 for i in range(7)]
envs/mujoco/pusher_translator.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''Pusher
2
+ Action Space Box(-2.0, 2.0, (7,), float32)
3
+ Observation Space Box(-inf, inf, (23,), float64)
4
+ '''
5
+ import math
6
+
7
+ class BasicLevelTranslator:
8
+ def __init__(self):
9
+ pass
10
+
11
+ def translate(self, state):
12
+
13
+ joint_angles = state[:7]
14
+ joint_velocities = state[7:14]
15
+ fingertip_coords = state[14:17]
16
+ object_coords = state[17:20]
17
+ goal_coords = state[20:]
18
+
19
+ joint_angle_degrees = [math.degrees(angle) for angle in joint_angles]
20
+ joint_velocity_degrees = [math.degrees(velocity) for velocity in joint_velocities]
21
+
22
+ res = (f"Rotation of the panning shoulder: {joint_angle_degrees[0]:.2f} degrees, "
23
+ f"Rotation of the shoulder lifting joint: {joint_angle_degrees[1]:.2f} degrees, "
24
+ f"Rotation of the shoulder rolling joint: {joint_angle_degrees[2]:.2f} degrees, "
25
+ f"Rotation of the elbow joint: {joint_angle_degrees[3]:.2f} degrees, "
26
+ f"Rotation of the forearm rolling joint: {joint_angle_degrees[4]:.2f} degrees, "
27
+ f"Rotation of the wrist flexing joint: {joint_angle_degrees[5]:.2f} degrees, "
28
+ f"Rotation of the wrist rolling joint: {joint_angle_degrees[6]:.2f} degrees, "
29
+ f"Rotational velocity of the panning shoulder: {joint_velocity_degrees[0]:.2f} degrees/s, "
30
+ f"Rotational velocity of the shoulder lifting joint: {joint_velocity_degrees[1]:.2f} degrees/s, "
31
+ f"Rotational velocity of the shoulder rolling joint: {joint_velocity_degrees[2]:.2f} degrees/s, "
32
+ f"Rotational velocity of the elbow joint: {joint_velocity_degrees[3]:.2f} degrees/s, "
33
+ f"Rotational velocity of the forearm rolling joint: {joint_velocity_degrees[4]:.2f} degrees/s, "
34
+ f"Rotational velocity of the wrist flexing joint: {joint_velocity_degrees[5]:.2f} degrees/s, "
35
+ f"Rotational velocity of the wrist rolling joint: {joint_velocity_degrees[6]:.2f} degrees/s, "
36
+ f"Fingertip coordinates (x, y, z): ({fingertip_coords[0]:.2f}, {fingertip_coords[1]:.2f}, {fingertip_coords[2]:.2f}), "
37
+ f"Object coordinates (x, y, z): ({object_coords[0]:.2f}, {object_coords[1]:.2f}, {object_coords[2]:.2f}), "
38
+ f"Goal coordinates (x, y, z): ({goal_coords[0]:.2f}, {goal_coords[1]:.2f}, {goal_coords[2]:.2f}).")
39
+ return res
40
+
41
+
42
+ class GameDescriber:
43
+ def __init__(self, args):
44
+ self.is_only_local_obs = args.is_only_local_obs == 1
45
+ self.max_episode_len = args.max_episode_len
46
+ self.action_desc_dict = {
47
+ }
48
+ self.reward_desc_dict = {
49
+ }
50
+
51
+ def translate_terminate_state(self, state, episode_len, max_episode_len):
52
+ return ""
53
+
54
+ def translate_potential_next_state(self, state, action):
55
+ return ""
56
+
57
+ def describe_goal(self):
58
+ return "The goal is to move the target cylinder (object) to the goal position using the robot's end effector (fingertip)."
59
+
60
+ def describe_game(self):
61
+ return ("In the Pusher game, you control a multi-jointed robot arm to manipulate a target cylinder (object) "
62
+ "and place it in a goal position using the robot's fingertip (end effector). The robot has shoulder, elbow, "
63
+ "forearm, and wrist joints that you can control with torque values. The observation space includes joint angles, "
64
+ "angular velocities of joints, fingertip coordinates, object coordinates, and goal coordinates. The reward is "
65
+ "based on the distance between the fingertip and the object, the distance between the object and the goal, "
66
+ "and control penalties for large actions.")
67
+
68
+ def describe_action(self):
69
+ return ("Your next move: \n Please provide a list of 7 numerical values within the range [-2, 2], "
70
+ "representing the torques applied to the robot's joints (shoulder, elbow, forearm, and wrist).")
71
+
72
+
73
+ class BasicStateSequenceTranslator(BasicLevelTranslator):
74
+ def translate(self, infos, is_current=False):
75
+ descriptions = []
76
+ if is_current:
77
+ state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
78
+ return state_desc
79
+ for info in infos:
80
+ assert 'state' in info, "info should contain state information"
81
+
82
+ state_desc = BasicLevelTranslator().translate(info['state'])
83
+ action_desc = ("Take Action: Apply Torques - "
84
+ "Shoulder Pan: {:.2f}, Shoulder Lift: {:.2f}, Shoulder Roll: {:.2f}, "
85
+ "Elbow Flex: {:.2f}, Forearm Roll: {:.2f}, Wrist Flex: {:.2f}, Wrist Roll: {:.2f}"
86
+ ).format(info['action'][0], info['action'][1], info['action'][2], info['action'][3],
87
+ info['action'][4], info['action'][5], info['action'][6])
88
+
89
+ reward_desc = f"Result: Reward of {info['reward']:.2f}"
90
+ next_state_desc = BasicLevelTranslator().translate(info['next_state'])
91
+ descriptions.append(f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to {next_state_desc}")
92
+ return descriptions
93
+
record_reflexion.csv CHANGED
@@ -16,4 +16,5 @@ Hopper-v4,1,expert,3542.2
16
  Walker2d-v4,1,expert,5000.0
17
  Swimmer-v4,1,expert,44.4
18
  Reacher-v4,1,expert,-2.6
 
19
 
 
16
  Walker2d-v4,1,expert,5000.0
17
  Swimmer-v4,1,expert,44.4
18
  Reacher-v4,1,expert,-2.6
19
+ Pusher-v4,1,expert,-52.3
20
 
shell/test_mujoco_hopper.sh CHANGED
@@ -1,5 +1,17 @@
1
  # Hopper-v4
2
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  # REFLEXION
4
  python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
5
 
 
1
  # Hopper-v4
2
 
3
+ # COT
4
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
+
15
  # REFLEXION
16
  python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
17
 
shell/test_mujoco_invertedPendulum.sh CHANGED
@@ -1,14 +1,14 @@
1
  # InvertedPendulum-v4
2
 
3
  # COT
4
- python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
 
6
- python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
 
8
  # SPP
9
- python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
 
11
- python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
 
13
 
14
 
 
1
  # InvertedPendulum-v4
2
 
3
  # COT
4
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
 
6
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
 
8
  # SPP
9
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
 
11
+ python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
 
13
 
14
 
shell/test_mujoco_pusher.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pusher-v4
2
+
3
+ # COT
4
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
5
+
6
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
7
+
8
+ # SPP
9
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
10
+
11
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
12
+
13
+
14
+ # REFLEXION
15
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
16
+
17
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
18
+
19
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
20
+
21
+
22
+ # exe
23
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
24
+
25
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
26
+
27
+ python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator