import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) MuJocoPyEnv.__init__( self, "inverted_double_pendulum.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, action): self.do_simulation(action, self.frame_skip) ob = self._get_obs() x, _, y = self.sim.data.site_xpos[0] dist_penalty = 0.01 * x**2 + (y - 2) ** 2 v1, v2 = self.sim.data.qvel[1:3] vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 alive_bonus = 10 r = alive_bonus - dist_penalty - vel_penalty terminated = bool(y <= 1) if self.render_mode == "human": self.render() return ob, r, terminated, False, {} def _get_obs(self): return np.concatenate( [ self.sim.data.qpos[:1], # cart x pos np.sin(self.sim.data.qpos[1:]), # link angles np.cos(self.sim.data.qpos[1:]), np.clip(self.sim.data.qvel, -10, 10), np.clip(self.sim.data.qfrc_constraint, -10, 10), ] ).ravel() def reset_model(self): self.set_state( self.init_qpos + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq), self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1, ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None v = self.viewer v.cam.trackbodyid = 0 v.cam.distance = self.model.stat.extent * 0.5 v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]