Spaces:
Running
Running
import numpy as np | |
from gym import utils | |
from gym.envs.mujoco import MujocoEnv | |
from gym.spaces import Box | |
class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): | |
""" | |
### Description | |
This environment is based on the environment introduced by Tassa, Erez and Todorov | |
in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025). | |
The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a | |
pair of legs and arms. The legs each consist of two links, and so the arms (representing the | |
knees and elbows respectively). The environment starts with the humanoid laying on the ground, | |
and then the goal of the environment is to make the humanoid standup and then keep it standing | |
by applying torques on the various hinges. | |
### Action Space | |
The agent take a 17-element vector for actions. | |
The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action` | |
represents the numerical torques applied at the hinge joints. | |
| Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | | |
| --- | ---------------------------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ | | |
| 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | hip_1 (front_left_leg) | hinge | torque (N m) | | |
| 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | angle_1 (front_left_leg) | hinge | torque (N m) | | |
| 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | hip_2 (front_right_leg) | hinge | torque (N m) | | |
| 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) | | |
| 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) | | |
| 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) | | |
| 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) | | |
| 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) | | |
| 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) | | |
| 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) | | |
| 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) | | |
| 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) | | |
| 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) | | |
| 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) | | |
| 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) | | |
| 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) | | |
| 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) | | |
### Observation Space | |
The state space consists of positional values of different body parts of the Humanoid, | |
followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. | |
**Note:** The x- and y-coordinates of the torso are being omitted to produce position-agnostic behavior in policies | |
The observation is a `ndarray` with shape `(376,)` where the elements correspond to the following: | |
| Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | |
| --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- | | |
| 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) | | |
| 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | |
| 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | |
| 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | |
| 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | |
| 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) | | |
| 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) | | |
| 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) | | |
| 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) | | |
| 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) | | |
| 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) | | |
| 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) | | |
| 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) | | |
| 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) | | |
| 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) | | |
| 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) | | |
| 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) | | |
| 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) | | |
| 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) | | |
| 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) | | |
| 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) | | |
| 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) | | |
| 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | |
| 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | |
| 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | |
| 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | |
| 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | |
| 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | |
| 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) | | |
| 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) | | |
| 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) | | |
| 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) | | |
| 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) | | |
| 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) | | |
| 35 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) | | |
| 36 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) | | |
| 37 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) | | |
| 38 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) | | |
| 39 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) | | |
| 40 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) | | |
| 41 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) | | |
| 42 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) | | |
| 43 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) | | |
| 44 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) | | |
| 45 | angular velocitty of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) | | |
Additionally, after all the positional and velocity based values in the table, | |
the state_space consists of (in order): | |
- *cinert:* Mass and inertia of a single rigid body relative to the center of mass | |
(this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*) | |
and hence adds to another 140 elements in the state space. | |
- *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence | |
adds another 84 elements in the state space | |
- *qfrc_actuator:* Constraint force generated as the actuator force. This has shape | |
`(23,)` *(nv * 1)* and hence adds another 23 elements to the state space. | |
- *cfrc_ext:* This is the center of mass based external force on the body. It has shape | |
14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space. | |
where *nbody* stands for the number of bodies in the robot and *nv* stands for the number | |
of degrees of freedom (*= dim(qvel)*) | |
The (x,y,z) coordinates are translational DOFs while the orientations are rotational | |
DOFs expressed as quaternions. One can read more about free joints on the | |
[Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html). | |
**Note:** HumanoidStandup-v4 environment no longer has the following contact forces issue. | |
If using previous HumanoidStandup versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results | |
in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0 | |
when using the Humanoid environment if you would like to report results with contact forces | |
(if contact forces are not used in your experiments, you can use version > 2.0). | |
### Rewards | |
The reward consists of three parts: | |
- *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative | |
reward which measures how much upward it has moved from the last timestep, but it is an | |
absolute reward which measures how much upward the Humanoid has moved overall. It is | |
measured as *(z coordinate after action - 0)/(atomic timestep)*, where *z coordinate after | |
action* is index 0 in the state/index 2 in the table, and *atomic timestep* is the time for | |
one frame of movement even though the simulation has a framerate of 5 (done in order to inflate | |
rewards a little for faster learning). | |
- *quad_ctrl_cost*: A negative reward for penalising the humanoid if it has too large of | |
a control force. If there are *nu* actuators/controls, then the control has shape `nu x 1`. | |
It is measured as *0.1 **x** sum(control<sup>2</sup>)*. | |
- *quad_impact_cost*: A negative reward for penalising the humanoid if the external | |
contact force is too large. It is calculated as *min(0.5 * 0.000001 * sum(external | |
contact force<sup>2</sup>), 10)*. | |
The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost* | |
### Starting State | |
All observations start in state | |
(0.0, 0.0, 0.105, 1.0, 0.0 ... 0.0) with a uniform noise in the range of | |
[-0.01, 0.01] added to the positional and velocity values (values in the table) | |
for stochasticity. Note that the initial z coordinate is intentionally selected | |
to be low, thereby indicating a laying down humanoid. The initial orientation is | |
designed to make it face forward as well. | |
### Episode End | |
The episode ends when any of the following happens: | |
1. Truncation: The episode duration reaches a 1000 timesteps | |
2. Termination: Any of the state space values is no longer finite | |
### Arguments | |
No additional arguments are currently supported. | |
``` | |
env = gym.make('HumanoidStandup-v4') | |
``` | |
There is no v3 for HumanoidStandup, unlike the robot environments where a v3 and | |
beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. | |
### Version History | |
* v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 | |
* v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) | |
* v2: All continuous control environments now use mujoco_py >= 1.50 | |
* v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. | |
* v0: Initial versions release (1.0.0) | |
""" | |
metadata = { | |
"render_modes": [ | |
"human", | |
"rgb_array", | |
"depth_array", | |
], | |
"render_fps": 67, | |
} | |
def __init__(self, **kwargs): | |
observation_space = Box( | |
low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 | |
) | |
MujocoEnv.__init__( | |
self, | |
"humanoidstandup.xml", | |
5, | |
observation_space=observation_space, | |
**kwargs | |
) | |
utils.EzPickle.__init__(self, **kwargs) | |
def _get_obs(self): | |
data = self.data | |
return np.concatenate( | |
[ | |
data.qpos.flat[2:], | |
data.qvel.flat, | |
data.cinert.flat, | |
data.cvel.flat, | |
data.qfrc_actuator.flat, | |
data.cfrc_ext.flat, | |
] | |
) | |
def step(self, a): | |
self.do_simulation(a, self.frame_skip) | |
pos_after = self.data.qpos[2] | |
data = self.data | |
uph_cost = (pos_after - 0) / self.model.opt.timestep | |
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() | |
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() | |
quad_impact_cost = min(quad_impact_cost, 10) | |
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 | |
if self.render_mode == "human": | |
self.render() | |
return ( | |
self._get_obs(), | |
reward, | |
False, | |
False, | |
dict( | |
reward_linup=uph_cost, | |
reward_quadctrl=-quad_ctrl_cost, | |
reward_impact=-quad_impact_cost, | |
), | |
) | |
def reset_model(self): | |
c = 0.01 | |
self.set_state( | |
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), | |
self.init_qvel | |
+ self.np_random.uniform( | |
low=-c, | |
high=c, | |
size=self.model.nv, | |
), | |
) | |
return self._get_obs() | |
def viewer_setup(self): | |
assert self.viewer is not None | |
self.viewer.cam.trackbodyid = 1 | |
self.viewer.cam.distance = self.model.stat.extent * 1.0 | |
self.viewer.cam.lookat[2] = 0.8925 | |
self.viewer.cam.elevation = -20 | |