Spaces:
Running
Running
__credits__ = ["Andrea PIERRÉ"] | |
import math | |
from typing import TYPE_CHECKING, List, Optional | |
import numpy as np | |
import gym | |
from gym import error, spaces | |
from gym.error import DependencyNotInstalled | |
from gym.utils import EzPickle | |
try: | |
import Box2D | |
from Box2D.b2 import ( | |
circleShape, | |
contactListener, | |
edgeShape, | |
fixtureDef, | |
polygonShape, | |
revoluteJointDef, | |
) | |
except ImportError: | |
raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`") | |
if TYPE_CHECKING: | |
import pygame | |
FPS = 50 | |
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well | |
MOTORS_TORQUE = 80 | |
SPEED_HIP = 4 | |
SPEED_KNEE = 6 | |
LIDAR_RANGE = 160 / SCALE | |
INITIAL_RANDOM = 5 | |
HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)] | |
LEG_DOWN = -8 / SCALE | |
LEG_W, LEG_H = 8 / SCALE, 34 / SCALE | |
VIEWPORT_W = 600 | |
VIEWPORT_H = 400 | |
TERRAIN_STEP = 14 / SCALE | |
TERRAIN_LENGTH = 200 # in steps | |
TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4 | |
TERRAIN_GRASS = 10 # low long are grass spots, in steps | |
TERRAIN_STARTPAD = 20 # in steps | |
FRICTION = 2.5 | |
HULL_FD = fixtureDef( | |
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]), | |
density=5.0, | |
friction=0.1, | |
categoryBits=0x0020, | |
maskBits=0x001, # collide only with ground | |
restitution=0.0, | |
) # 0.99 bouncy | |
LEG_FD = fixtureDef( | |
shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)), | |
density=1.0, | |
restitution=0.0, | |
categoryBits=0x0020, | |
maskBits=0x001, | |
) | |
LOWER_FD = fixtureDef( | |
shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)), | |
density=1.0, | |
restitution=0.0, | |
categoryBits=0x0020, | |
maskBits=0x001, | |
) | |
class ContactDetector(contactListener): | |
def __init__(self, env): | |
contactListener.__init__(self) | |
self.env = env | |
def BeginContact(self, contact): | |
if ( | |
self.env.hull == contact.fixtureA.body | |
or self.env.hull == contact.fixtureB.body | |
): | |
self.env.game_over = True | |
for leg in [self.env.legs[1], self.env.legs[3]]: | |
if leg in [contact.fixtureA.body, contact.fixtureB.body]: | |
leg.ground_contact = True | |
def EndContact(self, contact): | |
for leg in [self.env.legs[1], self.env.legs[3]]: | |
if leg in [contact.fixtureA.body, contact.fixtureB.body]: | |
leg.ground_contact = False | |
class BipedalWalker(gym.Env, EzPickle): | |
""" | |
### Description | |
This is a simple 4-joint walker robot environment. | |
There are two versions: | |
- Normal, with slightly uneven terrain. | |
- Hardcore, with ladders, stumps, pitfalls. | |
To solve the normal version, you need to get 300 points in 1600 time steps. | |
To solve the hardcore version, you need 300 points in 2000 time steps. | |
A heuristic is provided for testing. It's also useful to get demonstrations | |
to learn from. To run the heuristic: | |
``` | |
python gym/envs/box2d/bipedal_walker.py | |
``` | |
### Action Space | |
Actions are motor speed values in the [-1, 1] range for each of the | |
4 joints at both hips and knees. | |
### Observation Space | |
State consists of hull angle speed, angular velocity, horizontal speed, | |
vertical speed, position of joints and joints angular speed, legs contact | |
with ground, and 10 lidar rangefinder measurements. There are no coordinates | |
in the state vector. | |
### Rewards | |
Reward is given for moving forward, totaling 300+ points up to the far end. | |
If the robot falls, it gets -100. Applying motor torque costs a small | |
amount of points. A more optimal agent will get a better score. | |
### Starting State | |
The walker starts standing at the left end of the terrain with the hull | |
horizontal, and both legs in the same position with a slight knee angle. | |
### Episode Termination | |
The episode will terminate if the hull gets in contact with the ground or | |
if the walker exceeds the right end of the terrain length. | |
### Arguments | |
To use to the _hardcore_ environment, you need to specify the | |
`hardcore=True` argument like below: | |
```python | |
import gym | |
env = gym.make("BipedalWalker-v3", hardcore=True) | |
``` | |
### Version History | |
- v3: returns closest lidar trace instead of furthest; | |
faster video recording | |
- v2: Count energy spent | |
- v1: Legs now report contact with ground; motors have higher torque and | |
speed; ground has higher friction; lidar rendered less nervously. | |
- v0: Initial version | |
<!-- ### References --> | |
### Credits | |
Created by Oleg Klimov | |
""" | |
metadata = { | |
"render_modes": ["human", "rgb_array"], | |
"render_fps": FPS, | |
} | |
def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): | |
EzPickle.__init__(self, render_mode, hardcore) | |
self.isopen = True | |
self.world = Box2D.b2World() | |
self.terrain: List[Box2D.b2Body] = [] | |
self.hull: Optional[Box2D.b2Body] = None | |
self.prev_shaping = None | |
self.hardcore = hardcore | |
self.fd_polygon = fixtureDef( | |
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]), | |
friction=FRICTION, | |
) | |
self.fd_edge = fixtureDef( | |
shape=edgeShape(vertices=[(0, 0), (1, 1)]), | |
friction=FRICTION, | |
categoryBits=0x0001, | |
) | |
# we use 5.0 to represent the joints moving at maximum | |
# 5 x the rated speed due to impulses from ground contact etc. | |
low = np.array( | |
[ | |
-math.pi, | |
-5.0, | |
-5.0, | |
-5.0, | |
-math.pi, | |
-5.0, | |
-math.pi, | |
-5.0, | |
-0.0, | |
-math.pi, | |
-5.0, | |
-math.pi, | |
-5.0, | |
-0.0, | |
] | |
+ [-1.0] * 10 | |
).astype(np.float32) | |
high = np.array( | |
[ | |
math.pi, | |
5.0, | |
5.0, | |
5.0, | |
math.pi, | |
5.0, | |
math.pi, | |
5.0, | |
5.0, | |
math.pi, | |
5.0, | |
math.pi, | |
5.0, | |
5.0, | |
] | |
+ [1.0] * 10 | |
).astype(np.float32) | |
self.action_space = spaces.Box( | |
np.array([-1, -1, -1, -1]).astype(np.float32), | |
np.array([1, 1, 1, 1]).astype(np.float32), | |
) | |
self.observation_space = spaces.Box(low, high) | |
# state = [ | |
# self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible. | |
# 2.0 * self.hull.angularVelocity / FPS, | |
# 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range | |
# 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS, | |
# self.joints[ | |
# 0 | |
# ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too) | |
# self.joints[0].speed / SPEED_HIP, | |
# self.joints[1].angle + 1.0, | |
# self.joints[1].speed / SPEED_KNEE, | |
# 1.0 if self.legs[1].ground_contact else 0.0, | |
# self.joints[2].angle, | |
# self.joints[2].speed / SPEED_HIP, | |
# self.joints[3].angle + 1.0, | |
# self.joints[3].speed / SPEED_KNEE, | |
# 1.0 if self.legs[3].ground_contact else 0.0, | |
# ] | |
# state += [l.fraction for l in self.lidar] | |
self.render_mode = render_mode | |
self.screen: Optional[pygame.Surface] = None | |
self.clock = None | |
def _destroy(self): | |
if not self.terrain: | |
return | |
self.world.contactListener = None | |
for t in self.terrain: | |
self.world.DestroyBody(t) | |
self.terrain = [] | |
self.world.DestroyBody(self.hull) | |
self.hull = None | |
for leg in self.legs: | |
self.world.DestroyBody(leg) | |
self.legs = [] | |
self.joints = [] | |
def _generate_terrain(self, hardcore): | |
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) | |
state = GRASS | |
velocity = 0.0 | |
y = TERRAIN_HEIGHT | |
counter = TERRAIN_STARTPAD | |
oneshot = False | |
self.terrain = [] | |
self.terrain_x = [] | |
self.terrain_y = [] | |
stair_steps, stair_width, stair_height = 0, 0, 0 | |
original_y = 0 | |
for i in range(TERRAIN_LENGTH): | |
x = i * TERRAIN_STEP | |
self.terrain_x.append(x) | |
if state == GRASS and not oneshot: | |
velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) | |
if i > TERRAIN_STARTPAD: | |
velocity += self.np_random.uniform(-1, 1) / SCALE # 1 | |
y += velocity | |
elif state == PIT and oneshot: | |
counter = self.np_random.integers(3, 5) | |
poly = [ | |
(x, y), | |
(x + TERRAIN_STEP, y), | |
(x + TERRAIN_STEP, y - 4 * TERRAIN_STEP), | |
(x, y - 4 * TERRAIN_STEP), | |
] | |
self.fd_polygon.shape.vertices = poly | |
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) | |
t.color1, t.color2 = (255, 255, 255), (153, 153, 153) | |
self.terrain.append(t) | |
self.fd_polygon.shape.vertices = [ | |
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly | |
] | |
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) | |
t.color1, t.color2 = (255, 255, 255), (153, 153, 153) | |
self.terrain.append(t) | |
counter += 2 | |
original_y = y | |
elif state == PIT and not oneshot: | |
y = original_y | |
if counter > 1: | |
y -= 4 * TERRAIN_STEP | |
elif state == STUMP and oneshot: | |
counter = self.np_random.integers(1, 3) | |
poly = [ | |
(x, y), | |
(x + counter * TERRAIN_STEP, y), | |
(x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP), | |
(x, y + counter * TERRAIN_STEP), | |
] | |
self.fd_polygon.shape.vertices = poly | |
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) | |
t.color1, t.color2 = (255, 255, 255), (153, 153, 153) | |
self.terrain.append(t) | |
elif state == STAIRS and oneshot: | |
stair_height = +1 if self.np_random.random() > 0.5 else -1 | |
stair_width = self.np_random.integers(4, 5) | |
stair_steps = self.np_random.integers(3, 5) | |
original_y = y | |
for s in range(stair_steps): | |
poly = [ | |
( | |
x + (s * stair_width) * TERRAIN_STEP, | |
y + (s * stair_height) * TERRAIN_STEP, | |
), | |
( | |
x + ((1 + s) * stair_width) * TERRAIN_STEP, | |
y + (s * stair_height) * TERRAIN_STEP, | |
), | |
( | |
x + ((1 + s) * stair_width) * TERRAIN_STEP, | |
y + (-1 + s * stair_height) * TERRAIN_STEP, | |
), | |
( | |
x + (s * stair_width) * TERRAIN_STEP, | |
y + (-1 + s * stair_height) * TERRAIN_STEP, | |
), | |
] | |
self.fd_polygon.shape.vertices = poly | |
t = self.world.CreateStaticBody(fixtures=self.fd_polygon) | |
t.color1, t.color2 = (255, 255, 255), (153, 153, 153) | |
self.terrain.append(t) | |
counter = stair_steps * stair_width | |
elif state == STAIRS and not oneshot: | |
s = stair_steps * stair_width - counter - stair_height | |
n = s / stair_width | |
y = original_y + (n * stair_height) * TERRAIN_STEP | |
oneshot = False | |
self.terrain_y.append(y) | |
counter -= 1 | |
if counter == 0: | |
counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS) | |
if state == GRASS and hardcore: | |
state = self.np_random.integers(1, _STATES_) | |
oneshot = True | |
else: | |
state = GRASS | |
oneshot = True | |
self.terrain_poly = [] | |
for i in range(TERRAIN_LENGTH - 1): | |
poly = [ | |
(self.terrain_x[i], self.terrain_y[i]), | |
(self.terrain_x[i + 1], self.terrain_y[i + 1]), | |
] | |
self.fd_edge.shape.vertices = poly | |
t = self.world.CreateStaticBody(fixtures=self.fd_edge) | |
color = (76, 255 if i % 2 == 0 else 204, 76) | |
t.color1 = color | |
t.color2 = color | |
self.terrain.append(t) | |
color = (102, 153, 76) | |
poly += [(poly[1][0], 0), (poly[0][0], 0)] | |
self.terrain_poly.append((poly, color)) | |
self.terrain.reverse() | |
def _generate_clouds(self): | |
# Sorry for the clouds, couldn't resist | |
self.cloud_poly = [] | |
for i in range(TERRAIN_LENGTH // 20): | |
x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP | |
y = VIEWPORT_H / SCALE * 3 / 4 | |
poly = [ | |
( | |
x | |
+ 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5) | |
+ self.np_random.uniform(0, 5 * TERRAIN_STEP), | |
y | |
+ 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5) | |
+ self.np_random.uniform(0, 5 * TERRAIN_STEP), | |
) | |
for a in range(5) | |
] | |
x1 = min(p[0] for p in poly) | |
x2 = max(p[0] for p in poly) | |
self.cloud_poly.append((poly, x1, x2)) | |
def reset( | |
self, | |
*, | |
seed: Optional[int] = None, | |
options: Optional[dict] = None, | |
): | |
super().reset(seed=seed) | |
self._destroy() | |
self.world.contactListener_bug_workaround = ContactDetector(self) | |
self.world.contactListener = self.world.contactListener_bug_workaround | |
self.game_over = False | |
self.prev_shaping = None | |
self.scroll = 0.0 | |
self.lidar_render = 0 | |
self._generate_terrain(self.hardcore) | |
self._generate_clouds() | |
init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2 | |
init_y = TERRAIN_HEIGHT + 2 * LEG_H | |
self.hull = self.world.CreateDynamicBody( | |
position=(init_x, init_y), fixtures=HULL_FD | |
) | |
self.hull.color1 = (127, 51, 229) | |
self.hull.color2 = (76, 76, 127) | |
self.hull.ApplyForceToCenter( | |
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True | |
) | |
self.legs: List[Box2D.b2Body] = [] | |
self.joints: List[Box2D.b2RevoluteJoint] = [] | |
for i in [-1, +1]: | |
leg = self.world.CreateDynamicBody( | |
position=(init_x, init_y - LEG_H / 2 - LEG_DOWN), | |
angle=(i * 0.05), | |
fixtures=LEG_FD, | |
) | |
leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25) | |
leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25) | |
rjd = revoluteJointDef( | |
bodyA=self.hull, | |
bodyB=leg, | |
localAnchorA=(0, LEG_DOWN), | |
localAnchorB=(0, LEG_H / 2), | |
enableMotor=True, | |
enableLimit=True, | |
maxMotorTorque=MOTORS_TORQUE, | |
motorSpeed=i, | |
lowerAngle=-0.8, | |
upperAngle=1.1, | |
) | |
self.legs.append(leg) | |
self.joints.append(self.world.CreateJoint(rjd)) | |
lower = self.world.CreateDynamicBody( | |
position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN), | |
angle=(i * 0.05), | |
fixtures=LOWER_FD, | |
) | |
lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25) | |
lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25) | |
rjd = revoluteJointDef( | |
bodyA=leg, | |
bodyB=lower, | |
localAnchorA=(0, -LEG_H / 2), | |
localAnchorB=(0, LEG_H / 2), | |
enableMotor=True, | |
enableLimit=True, | |
maxMotorTorque=MOTORS_TORQUE, | |
motorSpeed=1, | |
lowerAngle=-1.6, | |
upperAngle=-0.1, | |
) | |
lower.ground_contact = False | |
self.legs.append(lower) | |
self.joints.append(self.world.CreateJoint(rjd)) | |
self.drawlist = self.terrain + self.legs + [self.hull] | |
class LidarCallback(Box2D.b2.rayCastCallback): | |
def ReportFixture(self, fixture, point, normal, fraction): | |
if (fixture.filterData.categoryBits & 1) == 0: | |
return -1 | |
self.p2 = point | |
self.fraction = fraction | |
return fraction | |
self.lidar = [LidarCallback() for _ in range(10)] | |
if self.render_mode == "human": | |
self.render() | |
return self.step(np.array([0, 0, 0, 0]))[0], {} | |
def step(self, action: np.ndarray): | |
assert self.hull is not None | |
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help | |
control_speed = False # Should be easier as well | |
if control_speed: | |
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1)) | |
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1)) | |
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1)) | |
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1)) | |
else: | |
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0])) | |
self.joints[0].maxMotorTorque = float( | |
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1) | |
) | |
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1])) | |
self.joints[1].maxMotorTorque = float( | |
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1) | |
) | |
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2])) | |
self.joints[2].maxMotorTorque = float( | |
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1) | |
) | |
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3])) | |
self.joints[3].maxMotorTorque = float( | |
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1) | |
) | |
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) | |
pos = self.hull.position | |
vel = self.hull.linearVelocity | |
for i in range(10): | |
self.lidar[i].fraction = 1.0 | |
self.lidar[i].p1 = pos | |
self.lidar[i].p2 = ( | |
pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE, | |
pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE, | |
) | |
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2) | |
state = [ | |
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible. | |
2.0 * self.hull.angularVelocity / FPS, | |
0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range | |
0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS, | |
self.joints[0].angle, | |
# This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too) | |
self.joints[0].speed / SPEED_HIP, | |
self.joints[1].angle + 1.0, | |
self.joints[1].speed / SPEED_KNEE, | |
1.0 if self.legs[1].ground_contact else 0.0, | |
self.joints[2].angle, | |
self.joints[2].speed / SPEED_HIP, | |
self.joints[3].angle + 1.0, | |
self.joints[3].speed / SPEED_KNEE, | |
1.0 if self.legs[3].ground_contact else 0.0, | |
] | |
state += [l.fraction for l in self.lidar] | |
assert len(state) == 24 | |
self.scroll = pos.x - VIEWPORT_W / SCALE / 5 | |
shaping = ( | |
130 * pos[0] / SCALE | |
) # moving forward is a way to receive reward (normalized to get 300 on completion) | |
shaping -= 5.0 * abs( | |
state[0] | |
) # keep head straight, other than that and falling, any behavior is unpunished | |
reward = 0 | |
if self.prev_shaping is not None: | |
reward = shaping - self.prev_shaping | |
self.prev_shaping = shaping | |
for a in action: | |
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1) | |
# normalized to about -50.0 using heuristic, more optimal agent should spend less | |
terminated = False | |
if self.game_over or pos[0] < 0: | |
reward = -100 | |
terminated = True | |
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP: | |
terminated = True | |
if self.render_mode == "human": | |
self.render() | |
return np.array(state, dtype=np.float32), reward, terminated, False, {} | |
def render(self): | |
if self.render_mode is None: | |
gym.logger.warn( | |
"You are calling render method without specifying any render mode. " | |
"You can specify the render_mode at initialization, " | |
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' | |
) | |
return | |
try: | |
import pygame | |
from pygame import gfxdraw | |
except ImportError: | |
raise DependencyNotInstalled( | |
"pygame is not installed, run `pip install gym[box2d]`" | |
) | |
if self.screen is None and self.render_mode == "human": | |
pygame.init() | |
pygame.display.init() | |
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H)) | |
if self.clock is None: | |
self.clock = pygame.time.Clock() | |
self.surf = pygame.Surface( | |
(VIEWPORT_W + max(0.0, self.scroll) * SCALE, VIEWPORT_H) | |
) | |
pygame.transform.scale(self.surf, (SCALE, SCALE)) | |
pygame.draw.polygon( | |
self.surf, | |
color=(215, 215, 255), | |
points=[ | |
(self.scroll * SCALE, 0), | |
(self.scroll * SCALE + VIEWPORT_W, 0), | |
(self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H), | |
(self.scroll * SCALE, VIEWPORT_H), | |
], | |
) | |
for poly, x1, x2 in self.cloud_poly: | |
if x2 < self.scroll / 2: | |
continue | |
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE: | |
continue | |
pygame.draw.polygon( | |
self.surf, | |
color=(255, 255, 255), | |
points=[ | |
(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly | |
], | |
) | |
gfxdraw.aapolygon( | |
self.surf, | |
[(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly], | |
(255, 255, 255), | |
) | |
for poly, color in self.terrain_poly: | |
if poly[1][0] < self.scroll: | |
continue | |
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE: | |
continue | |
scaled_poly = [] | |
for coord in poly: | |
scaled_poly.append([coord[0] * SCALE, coord[1] * SCALE]) | |
pygame.draw.polygon(self.surf, color=color, points=scaled_poly) | |
gfxdraw.aapolygon(self.surf, scaled_poly, color) | |
self.lidar_render = (self.lidar_render + 1) % 100 | |
i = self.lidar_render | |
if i < 2 * len(self.lidar): | |
single_lidar = ( | |
self.lidar[i] | |
if i < len(self.lidar) | |
else self.lidar[len(self.lidar) - i - 1] | |
) | |
if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"): | |
pygame.draw.line( | |
self.surf, | |
color=(255, 0, 0), | |
start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE), | |
end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE), | |
width=1, | |
) | |
for obj in self.drawlist: | |
for f in obj.fixtures: | |
trans = f.body.transform | |
if type(f.shape) is circleShape: | |
pygame.draw.circle( | |
self.surf, | |
color=obj.color1, | |
center=trans * f.shape.pos * SCALE, | |
radius=f.shape.radius * SCALE, | |
) | |
pygame.draw.circle( | |
self.surf, | |
color=obj.color2, | |
center=trans * f.shape.pos * SCALE, | |
radius=f.shape.radius * SCALE, | |
) | |
else: | |
path = [trans * v * SCALE for v in f.shape.vertices] | |
if len(path) > 2: | |
pygame.draw.polygon(self.surf, color=obj.color1, points=path) | |
gfxdraw.aapolygon(self.surf, path, obj.color1) | |
path.append(path[0]) | |
pygame.draw.polygon( | |
self.surf, color=obj.color2, points=path, width=1 | |
) | |
gfxdraw.aapolygon(self.surf, path, obj.color2) | |
else: | |
pygame.draw.aaline( | |
self.surf, | |
start_pos=path[0], | |
end_pos=path[1], | |
color=obj.color1, | |
) | |
flagy1 = TERRAIN_HEIGHT * SCALE | |
flagy2 = flagy1 + 50 | |
x = TERRAIN_STEP * 3 * SCALE | |
pygame.draw.aaline( | |
self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2) | |
) | |
f = [ | |
(x, flagy2), | |
(x, flagy2 - 10), | |
(x + 25, flagy2 - 5), | |
] | |
pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f) | |
pygame.draw.lines( | |
self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False | |
) | |
self.surf = pygame.transform.flip(self.surf, False, True) | |
if self.render_mode == "human": | |
assert self.screen is not None | |
self.screen.blit(self.surf, (-self.scroll * SCALE, 0)) | |
pygame.event.pump() | |
self.clock.tick(self.metadata["render_fps"]) | |
pygame.display.flip() | |
elif self.render_mode == "rgb_array": | |
return np.transpose( | |
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2) | |
)[:, -VIEWPORT_W:] | |
def close(self): | |
if self.screen is not None: | |
import pygame | |
pygame.display.quit() | |
pygame.quit() | |
self.isopen = False | |
class BipedalWalkerHardcore: | |
def __init__(self): | |
raise error.Error( | |
"Error initializing BipedalWalkerHardcore Environment.\n" | |
"Currently, we do not support initializing this mode of environment by calling the class directly.\n" | |
"To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n" | |
'gym.make("BipedalWalker-v3", hardcore=True)' | |
) | |
if __name__ == "__main__": | |
# Heurisic: suboptimal, have no notion of balance. | |
env = BipedalWalker() | |
env.reset() | |
steps = 0 | |
total_reward = 0 | |
a = np.array([0.0, 0.0, 0.0, 0.0]) | |
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3 | |
SPEED = 0.29 # Will fall forward on higher speed | |
state = STAY_ON_ONE_LEG | |
moving_leg = 0 | |
supporting_leg = 1 - moving_leg | |
SUPPORT_KNEE_ANGLE = +0.1 | |
supporting_knee_angle = SUPPORT_KNEE_ANGLE | |
while True: | |
s, r, terminated, truncated, info = env.step(a) | |
total_reward += r | |
if steps % 20 == 0 or terminated or truncated: | |
print("\naction " + str([f"{x:+0.2f}" for x in a])) | |
print(f"step {steps} total_reward {total_reward:+0.2f}") | |
print("hull " + str([f"{x:+0.2f}" for x in s[0:4]])) | |
print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]])) | |
print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]])) | |
steps += 1 | |
contact0 = s[8] | |
contact1 = s[13] | |
moving_s_base = 4 + 5 * moving_leg | |
supporting_s_base = 4 + 5 * supporting_leg | |
hip_targ = [None, None] # -0.8 .. +1.1 | |
knee_targ = [None, None] # -0.6 .. +0.9 | |
hip_todo = [0.0, 0.0] | |
knee_todo = [0.0, 0.0] | |
if state == STAY_ON_ONE_LEG: | |
hip_targ[moving_leg] = 1.1 | |
knee_targ[moving_leg] = -0.6 | |
supporting_knee_angle += 0.03 | |
if s[2] > SPEED: | |
supporting_knee_angle += 0.03 | |
supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE) | |
knee_targ[supporting_leg] = supporting_knee_angle | |
if s[supporting_s_base + 0] < 0.10: # supporting leg is behind | |
state = PUT_OTHER_DOWN | |
if state == PUT_OTHER_DOWN: | |
hip_targ[moving_leg] = +0.1 | |
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE | |
knee_targ[supporting_leg] = supporting_knee_angle | |
if s[moving_s_base + 4]: | |
state = PUSH_OFF | |
supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE) | |
if state == PUSH_OFF: | |
knee_targ[moving_leg] = supporting_knee_angle | |
knee_targ[supporting_leg] = +1.0 | |
if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED: | |
state = STAY_ON_ONE_LEG | |
moving_leg = 1 - moving_leg | |
supporting_leg = 1 - moving_leg | |
if hip_targ[0]: | |
hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5] | |
if hip_targ[1]: | |
hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10] | |
if knee_targ[0]: | |
knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7] | |
if knee_targ[1]: | |
knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12] | |
hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait | |
hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1] | |
knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations | |
knee_todo[1] -= 15.0 * s[3] | |
a[0] = hip_todo[0] | |
a[1] = knee_todo[0] | |
a[2] = hip_todo[1] | |
a[3] = knee_todo[1] | |
a = np.clip(0.5 * a, -1.0, 1.0) | |
if terminated or truncated: | |
break | |