__credits__ = ["Andrea PIERRÉ"] import math from typing import Optional, Union import numpy as np import gym from gym import spaces from gym.envs.box2d.car_dynamics import Car from gym.error import DependencyNotInstalled, InvalidAction from gym.utils import EzPickle try: import Box2D from Box2D.b2 import contactListener, fixtureDef, polygonShape except ImportError: raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`") try: # As pygame is necessary for using the environment (reset and step) even without a render mode # therefore, pygame is a necessary import for the environment. import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[box2d]`" ) STATE_W = 96 # less than Atari 160x192 STATE_H = 96 VIDEO_W = 600 VIDEO_H = 400 WINDOW_W = 1000 WINDOW_H = 800 SCALE = 6.0 # Track scale TRACK_RAD = 900 / SCALE # Track is heavily morphed circle with this radius PLAYFIELD = 2000 / SCALE # Game over boundary FPS = 50 # Frames per second ZOOM = 2.7 # Camera zoom ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom) TRACK_DETAIL_STEP = 21 / SCALE TRACK_TURN_RATE = 0.31 TRACK_WIDTH = 40 / SCALE BORDER = 8 / SCALE BORDER_MIN_COUNT = 4 GRASS_DIM = PLAYFIELD / 20.0 MAX_SHAPE_DIM = ( max(GRASS_DIM, TRACK_WIDTH, TRACK_DETAIL_STEP) * math.sqrt(2) * ZOOM * SCALE ) class FrictionDetector(contactListener): def __init__(self, env, lap_complete_percent): contactListener.__init__(self) self.env = env self.lap_complete_percent = lap_complete_percent def BeginContact(self, contact): self._contact(contact, True) def EndContact(self, contact): self._contact(contact, False) def _contact(self, contact, begin): tile = None obj = None u1 = contact.fixtureA.body.userData u2 = contact.fixtureB.body.userData if u1 and "road_friction" in u1.__dict__: tile = u1 obj = u2 if u2 and "road_friction" in u2.__dict__: tile = u2 obj = u1 if not tile: return # inherit tile color from env tile.color[:] = self.env.road_color if not obj or "tiles" not in obj.__dict__: return if begin: obj.tiles.add(tile) if not tile.road_visited: tile.road_visited = True self.env.reward += 1000.0 / len(self.env.track) self.env.tile_visited_count += 1 # Lap is considered completed if enough % of the track was covered if ( tile.idx == 0 and self.env.tile_visited_count / len(self.env.track) > self.lap_complete_percent ): self.env.new_lap = True else: obj.tiles.remove(tile) class CarRacing(gym.Env, EzPickle): """ ### Description The easiest control task to learn from pixels - a top-down racing environment. The generated track is random every episode. Some indicators are shown at the bottom of the window along with the state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope. To play yourself (it's rather fast for humans), type: ``` python gym/envs/box2d/car_racing.py ``` Remember: it's a powerful rear-wheel drive car - don't press the accelerator and turn at the same time. ### Action Space If continuous: There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking. If discrete: There are 5 actions: do nothing, steer left, steer right, gas, brake. ### Observation Space State consists of 96x96 pixels. ### Rewards The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. ### Starting State The car starts at rest in the center of the road. ### Episode Termination The episode finishes when all of the tiles are visited. The car can also go outside of the playfield - that is, far off the track, in which case it will receive -100 reward and die. ### Arguments `lap_complete_percent` dictates the percentage of tiles that must be visited by the agent before a lap is considered complete. Passing `domain_randomize=True` enables the domain randomized variant of the environment. In this scenario, the background and track colours are different on every reset. Passing `continuous=False` converts the environment to use discrete action space. The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. ### Reset Arguments Passing the option `options["randomize"] = True` will change the current colour of the environment on demand. Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment. `domain_randomize` must be `True` on init for this argument to work. Example usage: ```py env = gym.make("CarRacing-v1", domain_randomize=True) # normal reset, this changes the colour scheme by default env.reset() # reset with colour scheme change env.reset(options={"randomize": True}) # reset with no colour scheme change env.reset(options={"randomize": False}) ``` ### Version History - v1: Change track completion logic and add domain randomization (0.24.0) - v0: Original version ### References - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. ### Credits Created by Oleg Klimov """ metadata = { "render_modes": [ "human", "rgb_array", "state_pixels", ], "render_fps": FPS, } def __init__( self, render_mode: Optional[str] = None, verbose: bool = False, lap_complete_percent: float = 0.95, domain_randomize: bool = False, continuous: bool = True, ): EzPickle.__init__( self, render_mode, verbose, lap_complete_percent, domain_randomize, continuous, ) self.continuous = continuous self.domain_randomize = domain_randomize self.lap_complete_percent = lap_complete_percent self._init_colors() self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) self.screen: Optional[pygame.Surface] = None self.surf = None self.clock = None self.isopen = True self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car: Optional[Car] = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.new_lap = False self.fd_tile = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]) ) # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric # or normalised however this is not possible here so ignore if self.continuous: self.action_space = spaces.Box( np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32), ) # steer, gas, brake else: self.action_space = spaces.Discrete(5) # do nothing, left, right, gas, brake self.observation_space = spaces.Box( low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8 ) self.render_mode = render_mode def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] assert self.car is not None self.car.destroy() def _init_colors(self): if self.domain_randomize: # domain randomize the bg and grass colour self.road_color = self.np_random.uniform(0, 210, size=3) self.bg_color = self.np_random.uniform(0, 210, size=3) self.grass_color = np.copy(self.bg_color) idx = self.np_random.integers(3) self.grass_color[idx] += 20 else: # default colours self.road_color = np.array([102, 102, 102]) self.bg_color = np.array([102, 204, 102]) self.grass_color = np.array([102, 230, 102]) def _reinit_colors(self, randomize): assert ( self.domain_randomize ), "domain_randomize must be True to use this function." if randomize: # domain randomize the bg and grass colour self.road_color = self.np_random.uniform(0, 210, size=3) self.bg_color = self.np_random.uniform(0, 210, size=3) self.grass_color = np.copy(self.bg_color) idx = self.np_random.integers(3) self.grass_color[idx] += 20 def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = ( track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha ) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1 : i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3])) ) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) * 255 t.color = self.road_color + c t.road_visited = False t.road_friction = 1.0 t.idx = i t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append( ( [b1_l, b1_r, b2_r, b2_l], (255, 255, 255) if i % 2 == 0 else (255, 0, 0), ) ) self.track = track return True def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.world.contactListener_bug_workaround = FrictionDetector( self, self.lap_complete_percent ) self.world.contactListener = self.world.contactListener_bug_workaround self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.new_lap = False self.road_poly = [] if self.domain_randomize: randomize = True if isinstance(options, dict): if "randomize" in options: randomize = options["randomize"] self._reinit_colors(randomize) while True: success = self._create_track() if success: break if self.verbose: print( "retry to generate track (normal if there are not many" "instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) if self.render_mode == "human": self.render() return self.step(None)[0], {} def step(self, action: Union[np.ndarray, int]): assert self.car is not None if action is not None: if self.continuous: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) else: if not self.action_space.contains(action): raise InvalidAction( f"you passed the invalid action `{action}`. " f"The supported action_space is `{self.action_space}`" ) self.car.steer(-0.6 * (action == 1) + 0.6 * (action == 2)) self.car.gas(0.2 * (action == 3)) self.car.brake(0.8 * (action == 4)) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self._render("state_pixels") step_reward = 0 terminated = False truncated = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track) or self.new_lap: # Truncation due to finishing lap # This should not be treated as a failure # but like a timeout truncated = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: terminated = True step_reward = -100 if self.render_mode == "human": self.render() return self.state, step_reward, terminated, truncated, {} def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) else: return self._render(self.render_mode) def _render(self, mode: str): assert mode in self.metadata["render_modes"] pygame.font.init() if self.screen is None and mode == "human": pygame.init() pygame.display.init() self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H)) if self.clock is None: self.clock = pygame.time.Clock() if "t" not in self.__dict__: return # reset() not called yet self.surf = pygame.Surface((WINDOW_W, WINDOW_H)) assert self.car is not None # computing transformations angle = -self.car.hull.angle # Animating first second zoom. zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = -(self.car.hull.position[0]) * zoom scroll_y = -(self.car.hull.position[1]) * zoom trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle) trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1]) self._render_road(zoom, trans, angle) self.car.draw( self.surf, zoom, trans, angle, mode not in ["state_pixels_list", "state_pixels"], ) self.surf = pygame.transform.flip(self.surf, False, True) # showing stats self._render_indicators(WINDOW_W, WINDOW_H) font = pygame.font.Font(pygame.font.get_default_font(), 42) text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0)) text_rect = text.get_rect() text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0) self.surf.blit(text, text_rect) if mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) assert self.screen is not None self.screen.fill(0) self.screen.blit(self.surf, (0, 0)) pygame.display.flip() if mode == "rgb_array": return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H)) elif mode == "state_pixels": return self._create_image_array(self.surf, (STATE_W, STATE_H)) else: return self.isopen def _render_road(self, zoom, translation, angle): bounds = PLAYFIELD field = [ (bounds, bounds), (bounds, -bounds), (-bounds, -bounds), (-bounds, bounds), ] # draw background self._draw_colored_polygon( self.surf, field, self.bg_color, zoom, translation, angle, clip=False ) # draw grass patches grass = [] for x in range(-20, 20, 2): for y in range(-20, 20, 2): grass.append( [ (GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + 0), (GRASS_DIM * x + 0, GRASS_DIM * y + 0), (GRASS_DIM * x + 0, GRASS_DIM * y + GRASS_DIM), (GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + GRASS_DIM), ] ) for poly in grass: self._draw_colored_polygon( self.surf, poly, self.grass_color, zoom, translation, angle ) # draw road for poly, color in self.road_poly: # converting to pixel coordinates poly = [(p[0], p[1]) for p in poly] color = [int(c) for c in color] self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle) def _render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 color = (0, 0, 0) polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)] pygame.draw.polygon(self.surf, color=color, points=polygon) def vertical_ind(place, val): return [ (place * s, H - (h + h * val)), ((place + 1) * s, H - (h + h * val)), ((place + 1) * s, H - h), ((place + 0) * s, H - h), ] def horiz_ind(place, val): return [ ((place + 0) * s, H - 4 * h), ((place + val) * s, H - 4 * h), ((place + val) * s, H - 2 * h), ((place + 0) * s, H - 2 * h), ] assert self.car is not None true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]) ) # simple wrapper to render if the indicator value is above a threshold def render_if_min(value, points, color): if abs(value) > 1e-4: pygame.draw.polygon(self.surf, points=points, color=color) render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255)) # ABS sensors render_if_min( self.car.wheels[0].omega, vertical_ind(7, 0.01 * self.car.wheels[0].omega), (0, 0, 255), ) render_if_min( self.car.wheels[1].omega, vertical_ind(8, 0.01 * self.car.wheels[1].omega), (0, 0, 255), ) render_if_min( self.car.wheels[2].omega, vertical_ind(9, 0.01 * self.car.wheels[2].omega), (51, 0, 255), ) render_if_min( self.car.wheels[3].omega, vertical_ind(10, 0.01 * self.car.wheels[3].omega), (51, 0, 255), ) render_if_min( self.car.wheels[0].joint.angle, horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle), (0, 255, 0), ) render_if_min( self.car.hull.angularVelocity, horiz_ind(30, -0.8 * self.car.hull.angularVelocity), (255, 0, 0), ) def _draw_colored_polygon( self, surface, poly, color, zoom, translation, angle, clip=True ): poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly] poly = [ (c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly ] # This checks if the polygon is out of bounds of the screen, and we skip drawing if so. # Instead of calculating exactly if the polygon and screen overlap, # we simply check if the polygon is in a larger bounding box whose dimension # is greater than the screen by MAX_SHAPE_DIM, which is the maximum # diagonal length of an environment object if not clip or any( (-MAX_SHAPE_DIM <= coord[0] <= WINDOW_W + MAX_SHAPE_DIM) and (-MAX_SHAPE_DIM <= coord[1] <= WINDOW_H + MAX_SHAPE_DIM) for coord in poly ): gfxdraw.aapolygon(self.surf, poly, color) gfxdraw.filled_polygon(self.surf, poly, color) def _create_image_array(self, screen, size): scaled_screen = pygame.transform.smoothscale(screen, size) return np.transpose( np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: pygame.display.quit() self.isopen = False pygame.quit() if __name__ == "__main__": a = np.array([0.0, 0.0, 0.0]) def register_input(): global quit, restart for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_LEFT: a[0] = -1.0 if event.key == pygame.K_RIGHT: a[0] = +1.0 if event.key == pygame.K_UP: a[1] = +1.0 if event.key == pygame.K_DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation if event.key == pygame.K_RETURN: restart = True if event.key == pygame.K_ESCAPE: quit = True if event.type == pygame.KEYUP: if event.key == pygame.K_LEFT: a[0] = 0 if event.key == pygame.K_RIGHT: a[0] = 0 if event.key == pygame.K_UP: a[1] = 0 if event.key == pygame.K_DOWN: a[2] = 0 if event.type == pygame.QUIT: quit = True env = CarRacing(render_mode="human") quit = False while not quit: env.reset() total_reward = 0.0 steps = 0 restart = False while True: register_input() s, r, terminated, truncated, info = env.step(a) total_reward += r if steps % 200 == 0 or terminated or truncated: print("\naction " + str([f"{x:+0.2f}" for x in a])) print(f"step {steps} total_reward {total_reward:+0.2f}") steps += 1 if terminated or truncated or restart or quit: break env.close()