Spaces:
Runtime error
Runtime error
Commit
·
6299d2b
1
Parent(s):
91977f9
add: Pusher env
Browse files- envs/__init__.py +9 -3
- envs/mujoco/ant_translator.py +1 -1
- envs/mujoco/pusher_policies.py +15 -0
- envs/mujoco/pusher_translator.py +93 -0
- record_reflexion.csv +1 -0
- shell/test_mujoco_hopper.sh +12 -0
- shell/test_mujoco_invertedPendulum.sh +4 -4
- shell/test_mujoco_pusher.sh +27 -0
envs/__init__.py
CHANGED
@@ -108,8 +108,9 @@ from .mujoco import reacher_translator, reacher_policies
|
|
108 |
from .mujoco import hopper_translator, hopper_policies
|
109 |
from .mujoco import walker2d_translator, walker2d_policies
|
110 |
|
111 |
-
|
112 |
-
|
|
|
113 |
|
114 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
115 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
@@ -135,12 +136,17 @@ REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTr
|
|
135 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
136 |
|
137 |
|
138 |
-
|
139 |
REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
|
140 |
REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
|
141 |
REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
|
142 |
|
|
|
|
|
|
|
|
|
143 |
|
|
|
144 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
145 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
146 |
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
|
|
108 |
from .mujoco import hopper_translator, hopper_policies
|
109 |
from .mujoco import walker2d_translator, walker2d_policies
|
110 |
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
|
115 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
116 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
|
|
136 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
137 |
|
138 |
|
139 |
+
from .mujoco import halfcheetah_translator, halfcheetah_policies
|
140 |
REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
|
141 |
REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
|
142 |
REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
|
143 |
|
144 |
+
from .mujoco import pusher_translator, pusher_policies
|
145 |
+
REGISTRY["pusher_init_translator"] = pusher_translator.GameDescriber
|
146 |
+
REGISTRY["pusher_basic_translator"] = pusher_translator.BasicStateSequenceTranslator
|
147 |
+
REGISTRY["pusher_policies"] = [pusher_policies.pseudo_random_policy, pusher_policies.real_random_policy]
|
148 |
|
149 |
+
from .mujoco import ant_translator, ant_policies
|
150 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
151 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
152 |
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
envs/mujoco/ant_translator.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
'''
|
2 |
Action Space Box(-1.0, 1.0, (8,), float32)
|
3 |
Observation Space Box(-inf, inf, (27,), float64)
|
4 |
'''
|
|
|
1 |
+
'''Ant
|
2 |
Action Space Box(-1.0, 1.0, (8,), float32)
|
3 |
Observation Space Box(-inf, inf, (27,), float64)
|
4 |
'''
|
envs/mujoco/pusher_policies.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
|
4 |
+
def pseudo_random_policy(state, pre_action):
|
5 |
+
def get_description():
|
6 |
+
return "Select action randomly"
|
7 |
+
pseudo_random_policy.description = get_description()
|
8 |
+
return [4 * random.random() - 2 for i in range(7)]
|
9 |
+
|
10 |
+
|
11 |
+
def real_random_policy(state, pre_action=1):
|
12 |
+
def get_description():
|
13 |
+
return "Select action with a random policy"
|
14 |
+
real_random_policy.description = get_description()
|
15 |
+
return [4 * random.random() - 2 for i in range(7)]
|
envs/mujoco/pusher_translator.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''Pusher
|
2 |
+
Action Space Box(-2.0, 2.0, (7,), float32)
|
3 |
+
Observation Space Box(-inf, inf, (23,), float64)
|
4 |
+
'''
|
5 |
+
import math
|
6 |
+
|
7 |
+
class BasicLevelTranslator:
|
8 |
+
def __init__(self):
|
9 |
+
pass
|
10 |
+
|
11 |
+
def translate(self, state):
|
12 |
+
|
13 |
+
joint_angles = state[:7]
|
14 |
+
joint_velocities = state[7:14]
|
15 |
+
fingertip_coords = state[14:17]
|
16 |
+
object_coords = state[17:20]
|
17 |
+
goal_coords = state[20:]
|
18 |
+
|
19 |
+
joint_angle_degrees = [math.degrees(angle) for angle in joint_angles]
|
20 |
+
joint_velocity_degrees = [math.degrees(velocity) for velocity in joint_velocities]
|
21 |
+
|
22 |
+
res = (f"Rotation of the panning shoulder: {joint_angle_degrees[0]:.2f} degrees, "
|
23 |
+
f"Rotation of the shoulder lifting joint: {joint_angle_degrees[1]:.2f} degrees, "
|
24 |
+
f"Rotation of the shoulder rolling joint: {joint_angle_degrees[2]:.2f} degrees, "
|
25 |
+
f"Rotation of the elbow joint: {joint_angle_degrees[3]:.2f} degrees, "
|
26 |
+
f"Rotation of the forearm rolling joint: {joint_angle_degrees[4]:.2f} degrees, "
|
27 |
+
f"Rotation of the wrist flexing joint: {joint_angle_degrees[5]:.2f} degrees, "
|
28 |
+
f"Rotation of the wrist rolling joint: {joint_angle_degrees[6]:.2f} degrees, "
|
29 |
+
f"Rotational velocity of the panning shoulder: {joint_velocity_degrees[0]:.2f} degrees/s, "
|
30 |
+
f"Rotational velocity of the shoulder lifting joint: {joint_velocity_degrees[1]:.2f} degrees/s, "
|
31 |
+
f"Rotational velocity of the shoulder rolling joint: {joint_velocity_degrees[2]:.2f} degrees/s, "
|
32 |
+
f"Rotational velocity of the elbow joint: {joint_velocity_degrees[3]:.2f} degrees/s, "
|
33 |
+
f"Rotational velocity of the forearm rolling joint: {joint_velocity_degrees[4]:.2f} degrees/s, "
|
34 |
+
f"Rotational velocity of the wrist flexing joint: {joint_velocity_degrees[5]:.2f} degrees/s, "
|
35 |
+
f"Rotational velocity of the wrist rolling joint: {joint_velocity_degrees[6]:.2f} degrees/s, "
|
36 |
+
f"Fingertip coordinates (x, y, z): ({fingertip_coords[0]:.2f}, {fingertip_coords[1]:.2f}, {fingertip_coords[2]:.2f}), "
|
37 |
+
f"Object coordinates (x, y, z): ({object_coords[0]:.2f}, {object_coords[1]:.2f}, {object_coords[2]:.2f}), "
|
38 |
+
f"Goal coordinates (x, y, z): ({goal_coords[0]:.2f}, {goal_coords[1]:.2f}, {goal_coords[2]:.2f}).")
|
39 |
+
return res
|
40 |
+
|
41 |
+
|
42 |
+
class GameDescriber:
|
43 |
+
def __init__(self, args):
|
44 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
45 |
+
self.max_episode_len = args.max_episode_len
|
46 |
+
self.action_desc_dict = {
|
47 |
+
}
|
48 |
+
self.reward_desc_dict = {
|
49 |
+
}
|
50 |
+
|
51 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
52 |
+
return ""
|
53 |
+
|
54 |
+
def translate_potential_next_state(self, state, action):
|
55 |
+
return ""
|
56 |
+
|
57 |
+
def describe_goal(self):
|
58 |
+
return "The goal is to move the target cylinder (object) to the goal position using the robot's end effector (fingertip)."
|
59 |
+
|
60 |
+
def describe_game(self):
|
61 |
+
return ("In the Pusher game, you control a multi-jointed robot arm to manipulate a target cylinder (object) "
|
62 |
+
"and place it in a goal position using the robot's fingertip (end effector). The robot has shoulder, elbow, "
|
63 |
+
"forearm, and wrist joints that you can control with torque values. The observation space includes joint angles, "
|
64 |
+
"angular velocities of joints, fingertip coordinates, object coordinates, and goal coordinates. The reward is "
|
65 |
+
"based on the distance between the fingertip and the object, the distance between the object and the goal, "
|
66 |
+
"and control penalties for large actions.")
|
67 |
+
|
68 |
+
def describe_action(self):
|
69 |
+
return ("Your next move: \n Please provide a list of 7 numerical values within the range [-2, 2], "
|
70 |
+
"representing the torques applied to the robot's joints (shoulder, elbow, forearm, and wrist).")
|
71 |
+
|
72 |
+
|
73 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
74 |
+
def translate(self, infos, is_current=False):
|
75 |
+
descriptions = []
|
76 |
+
if is_current:
|
77 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
78 |
+
return state_desc
|
79 |
+
for info in infos:
|
80 |
+
assert 'state' in info, "info should contain state information"
|
81 |
+
|
82 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
83 |
+
action_desc = ("Take Action: Apply Torques - "
|
84 |
+
"Shoulder Pan: {:.2f}, Shoulder Lift: {:.2f}, Shoulder Roll: {:.2f}, "
|
85 |
+
"Elbow Flex: {:.2f}, Forearm Roll: {:.2f}, Wrist Flex: {:.2f}, Wrist Roll: {:.2f}"
|
86 |
+
).format(info['action'][0], info['action'][1], info['action'][2], info['action'][3],
|
87 |
+
info['action'][4], info['action'][5], info['action'][6])
|
88 |
+
|
89 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
90 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
91 |
+
descriptions.append(f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to {next_state_desc}")
|
92 |
+
return descriptions
|
93 |
+
|
record_reflexion.csv
CHANGED
@@ -16,4 +16,5 @@ Hopper-v4,1,expert,3542.2
|
|
16 |
Walker2d-v4,1,expert,5000.0
|
17 |
Swimmer-v4,1,expert,44.4
|
18 |
Reacher-v4,1,expert,-2.6
|
|
|
19 |
|
|
|
16 |
Walker2d-v4,1,expert,5000.0
|
17 |
Swimmer-v4,1,expert,44.4
|
18 |
Reacher-v4,1,expert,-2.6
|
19 |
+
Pusher-v4,1,expert,-52.3
|
20 |
|
shell/test_mujoco_hopper.sh
CHANGED
@@ -1,5 +1,17 @@
|
|
1 |
# Hopper-v4
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# REFLEXION
|
4 |
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
5 |
|
|
|
1 |
# Hopper-v4
|
2 |
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
# REFLEXION
|
16 |
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
17 |
|
shell/test_mujoco_invertedPendulum.sh
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
# InvertedPendulum-v4
|
2 |
|
3 |
# COT
|
4 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
5 |
|
6 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
7 |
|
8 |
# SPP
|
9 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
10 |
|
11 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
12 |
|
13 |
|
14 |
|
|
|
1 |
# InvertedPendulum-v4
|
2 |
|
3 |
# COT
|
4 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
|
6 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
|
8 |
# SPP
|
9 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
|
11 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
|
13 |
|
14 |
|
shell/test_mujoco_pusher.sh
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Pusher-v4
|
2 |
+
|
3 |
+
# COT
|
4 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
5 |
+
|
6 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
7 |
+
|
8 |
+
# SPP
|
9 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
10 |
+
|
11 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
12 |
+
|
13 |
+
|
14 |
+
# REFLEXION
|
15 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
16 |
+
|
17 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
18 |
+
|
19 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
20 |
+
|
21 |
+
|
22 |
+
# exe
|
23 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
24 |
+
|
25 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
26 |
+
|
27 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|