GameServerX

Running

App Files Files Community

GameServerX / MLPY /Lib /site-packages /gym /envs /box2d /car_racing.py

Kano001

Upload 919 files

375a1cf verified 3 months ago

raw

history blame contribute delete

28.9 kB

	__credits__ = ["Andrea PIERRÉ"]

	import math
	from typing import Optional, Union

	import numpy as np

	import gym
	from gym import spaces
	from gym.envs.box2d.car_dynamics import Car
	from gym.error import DependencyNotInstalled, InvalidAction
	from gym.utils import EzPickle

	try:
	import Box2D
	from Box2D.b2 import contactListener, fixtureDef, polygonShape
	except ImportError:
	raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`")

	try:
	# As pygame is necessary for using the environment (reset and step) even without a render mode
	# therefore, pygame is a necessary import for the environment.
	import pygame
	from pygame import gfxdraw
	except ImportError:
	raise DependencyNotInstalled(
	"pygame is not installed, run `pip install gym[box2d]`"
	)


	STATE_W = 96 # less than Atari 160x192
	STATE_H = 96
	VIDEO_W = 600
	VIDEO_H = 400
	WINDOW_W = 1000
	WINDOW_H = 800

	SCALE = 6.0 # Track scale
	TRACK_RAD = 900 / SCALE # Track is heavily morphed circle with this radius
	PLAYFIELD = 2000 / SCALE # Game over boundary
	FPS = 50 # Frames per second
	ZOOM = 2.7 # Camera zoom
	ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)


	TRACK_DETAIL_STEP = 21 / SCALE
	TRACK_TURN_RATE = 0.31
	TRACK_WIDTH = 40 / SCALE
	BORDER = 8 / SCALE
	BORDER_MIN_COUNT = 4
	GRASS_DIM = PLAYFIELD / 20.0
	MAX_SHAPE_DIM = (
	max(GRASS_DIM, TRACK_WIDTH, TRACK_DETAIL_STEP) * math.sqrt(2) * ZOOM * SCALE
	)


	class FrictionDetector(contactListener):
	def __init__(self, env, lap_complete_percent):
	contactListener.__init__(self)
	self.env = env
	self.lap_complete_percent = lap_complete_percent

	def BeginContact(self, contact):
	self._contact(contact, True)

	def EndContact(self, contact):
	self._contact(contact, False)

	def _contact(self, contact, begin):
	tile = None
	obj = None
	u1 = contact.fixtureA.body.userData
	u2 = contact.fixtureB.body.userData
	if u1 and "road_friction" in u1.__dict__:
	tile = u1
	obj = u2
	if u2 and "road_friction" in u2.__dict__:
	tile = u2
	obj = u1
	if not tile:
	return

	# inherit tile color from env
	tile.color[:] = self.env.road_color
	if not obj or "tiles" not in obj.__dict__:
	return
	if begin:
	obj.tiles.add(tile)
	if not tile.road_visited:
	tile.road_visited = True
	self.env.reward += 1000.0 / len(self.env.track)
	self.env.tile_visited_count += 1

	# Lap is considered completed if enough % of the track was covered
	if (
	tile.idx == 0
	and self.env.tile_visited_count / len(self.env.track)
	> self.lap_complete_percent
	):
	self.env.new_lap = True
	else:
	obj.tiles.remove(tile)


	class CarRacing(gym.Env, EzPickle):
	"""
	### Description
	The easiest control task to learn from pixels - a top-down
	racing environment. The generated track is random every episode.

	Some indicators are shown at the bottom of the window along with the
	state RGB buffer. From left to right: true speed, four ABS sensors,
	steering wheel position, and gyroscope.
	To play yourself (it's rather fast for humans), type:
	```
	python gym/envs/box2d/car_racing.py
	```
	Remember: it's a powerful rear-wheel drive car - don't press the accelerator
	and turn at the same time.

	### Action Space
	If continuous:
	There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking.
	If discrete:
	There are 5 actions: do nothing, steer left, steer right, gas, brake.

	### Observation Space
	State consists of 96x96 pixels.

	### Rewards
	The reward is -0.1 every frame and +1000/N for every track tile visited,
	where N is the total number of tiles visited in the track. For example,
	if you have finished in 732 frames, your reward is
	1000 - 0.1*732 = 926.8 points.

	### Starting State
	The car starts at rest in the center of the road.

	### Episode Termination
	The episode finishes when all of the tiles are visited. The car can also go
	outside of the playfield - that is, far off the track, in which case it will
	receive -100 reward and die.

	### Arguments
	`lap_complete_percent` dictates the percentage of tiles that must be visited by
	the agent before a lap is considered complete.

	Passing `domain_randomize=True` enables the domain randomized variant of the environment.
	In this scenario, the background and track colours are different on every reset.

	Passing `continuous=False` converts the environment to use discrete action space.
	The discrete action space has 5 actions: [do nothing, left, right, gas, brake].

	### Reset Arguments
	Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
	Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
	`domain_randomize` must be `True` on init for this argument to work.
	Example usage:
	```py
	env = gym.make("CarRacing-v1", domain_randomize=True)

	# normal reset, this changes the colour scheme by default
	env.reset()

	# reset with colour scheme change
	env.reset(options={"randomize": True})

	# reset with no colour scheme change
	env.reset(options={"randomize": False})
	```

	### Version History
	- v1: Change track completion logic and add domain randomization (0.24.0)
	- v0: Original version

	### References
	- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.

	### Credits
	Created by Oleg Klimov
	"""

	metadata = {
	"render_modes": [
	"human",
	"rgb_array",
	"state_pixels",
	],
	"render_fps": FPS,
	}

	def __init__(
	self,
	render_mode: Optional[str] = None,
	verbose: bool = False,
	lap_complete_percent: float = 0.95,
	domain_randomize: bool = False,
	continuous: bool = True,
	):
	EzPickle.__init__(
	self,
	render_mode,
	verbose,
	lap_complete_percent,
	domain_randomize,
	continuous,
	)
	self.continuous = continuous
	self.domain_randomize = domain_randomize
	self.lap_complete_percent = lap_complete_percent
	self._init_colors()

	self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent)
	self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
	self.screen: Optional[pygame.Surface] = None
	self.surf = None
	self.clock = None
	self.isopen = True
	self.invisible_state_window = None
	self.invisible_video_window = None
	self.road = None
	self.car: Optional[Car] = None
	self.reward = 0.0
	self.prev_reward = 0.0
	self.verbose = verbose
	self.new_lap = False
	self.fd_tile = fixtureDef(
	shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
	)

	# This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
	# or normalised however this is not possible here so ignore
	if self.continuous:
	self.action_space = spaces.Box(
	np.array([-1, 0, 0]).astype(np.float32),
	np.array([+1, +1, +1]).astype(np.float32),
	) # steer, gas, brake
	else:
	self.action_space = spaces.Discrete(5)
	# do nothing, left, right, gas, brake

	self.observation_space = spaces.Box(
	low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
	)

	self.render_mode = render_mode

	def _destroy(self):
	if not self.road:
	return
	for t in self.road:
	self.world.DestroyBody(t)
	self.road = []
	assert self.car is not None
	self.car.destroy()

	def _init_colors(self):
	if self.domain_randomize:
	# domain randomize the bg and grass colour
	self.road_color = self.np_random.uniform(0, 210, size=3)

	self.bg_color = self.np_random.uniform(0, 210, size=3)

	self.grass_color = np.copy(self.bg_color)
	idx = self.np_random.integers(3)
	self.grass_color[idx] += 20
	else:
	# default colours
	self.road_color = np.array([102, 102, 102])
	self.bg_color = np.array([102, 204, 102])
	self.grass_color = np.array([102, 230, 102])

	def _reinit_colors(self, randomize):
	assert (
	self.domain_randomize
	), "domain_randomize must be True to use this function."

	if randomize:
	# domain randomize the bg and grass colour
	self.road_color = self.np_random.uniform(0, 210, size=3)

	self.bg_color = self.np_random.uniform(0, 210, size=3)

	self.grass_color = np.copy(self.bg_color)
	idx = self.np_random.integers(3)
	self.grass_color[idx] += 20

	def _create_track(self):
	CHECKPOINTS = 12

	# Create checkpoints
	checkpoints = []
	for c in range(CHECKPOINTS):
	noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
	alpha = 2 * math.pi * c / CHECKPOINTS + noise
	rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)

	if c == 0:
	alpha = 0
	rad = 1.5 * TRACK_RAD
	if c == CHECKPOINTS - 1:
	alpha = 2 * math.pi * c / CHECKPOINTS
	self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
	rad = 1.5 * TRACK_RAD

	checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
	self.road = []

	# Go from one checkpoint to another to create track
	x, y, beta = 1.5 * TRACK_RAD, 0, 0
	dest_i = 0
	laps = 0
	track = []
	no_freeze = 2500
	visited_other_side = False
	while True:
	alpha = math.atan2(y, x)
	if visited_other_side and alpha > 0:
	laps += 1
	visited_other_side = False
	if alpha < 0:
	visited_other_side = True
	alpha += 2 * math.pi

	while True: # Find destination from checkpoints
	failed = True

	while True:
	dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
	if alpha <= dest_alpha:
	failed = False
	break
	dest_i += 1
	if dest_i % len(checkpoints) == 0:
	break

	if not failed:
	break

	alpha -= 2 * math.pi
	continue

	r1x = math.cos(beta)
	r1y = math.sin(beta)
	p1x = -r1y
	p1y = r1x
	dest_dx = dest_x - x # vector towards destination
	dest_dy = dest_y - y
	# destination vector projected on rad:
	proj = r1x * dest_dx + r1y * dest_dy
	while beta - alpha > 1.5 * math.pi:
	beta -= 2 * math.pi
	while beta - alpha < -1.5 * math.pi:
	beta += 2 * math.pi
	prev_beta = beta
	proj *= SCALE
	if proj > 0.3:
	beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
	if proj < -0.3:
	beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
	x += p1x * TRACK_DETAIL_STEP
	y += p1y * TRACK_DETAIL_STEP
	track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
	if laps > 4:
	break
	no_freeze -= 1
	if no_freeze == 0:
	break

	# Find closed loop range i1..i2, first loop should be ignored, second is OK
	i1, i2 = -1, -1
	i = len(track)
	while True:
	i -= 1
	if i == 0:
	return False # Failed
	pass_through_start = (
	track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
	)
	if pass_through_start and i2 == -1:
	i2 = i
	elif pass_through_start and i1 == -1:
	i1 = i
	break
	if self.verbose:
	print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
	assert i1 != -1
	assert i2 != -1

	track = track[i1 : i2 - 1]

	first_beta = track[0][1]
	first_perp_x = math.cos(first_beta)
	first_perp_y = math.sin(first_beta)
	# Length of perpendicular jump to put together head and tail
	well_glued_together = np.sqrt(
	np.square(first_perp_x * (track[0][2] - track[-1][2]))
	+ np.square(first_perp_y * (track[0][3] - track[-1][3]))
	)
	if well_glued_together > TRACK_DETAIL_STEP:
	return False

	# Red-white border on hard turns
	border = [False] * len(track)
	for i in range(len(track)):
	good = True
	oneside = 0
	for neg in range(BORDER_MIN_COUNT):
	beta1 = track[i - neg - 0][1]
	beta2 = track[i - neg - 1][1]
	good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
	oneside += np.sign(beta1 - beta2)
	good &= abs(oneside) == BORDER_MIN_COUNT
	border[i] = good
	for i in range(len(track)):
	for neg in range(BORDER_MIN_COUNT):
	border[i - neg] \|= border[i]

	# Create tiles
	for i in range(len(track)):
	alpha1, beta1, x1, y1 = track[i]
	alpha2, beta2, x2, y2 = track[i - 1]
	road1_l = (
	x1 - TRACK_WIDTH * math.cos(beta1),
	y1 - TRACK_WIDTH * math.sin(beta1),
	)
	road1_r = (
	x1 + TRACK_WIDTH * math.cos(beta1),
	y1 + TRACK_WIDTH * math.sin(beta1),
	)
	road2_l = (
	x2 - TRACK_WIDTH * math.cos(beta2),
	y2 - TRACK_WIDTH * math.sin(beta2),
	)
	road2_r = (
	x2 + TRACK_WIDTH * math.cos(beta2),
	y2 + TRACK_WIDTH * math.sin(beta2),
	)
	vertices = [road1_l, road1_r, road2_r, road2_l]
	self.fd_tile.shape.vertices = vertices
	t = self.world.CreateStaticBody(fixtures=self.fd_tile)
	t.userData = t
	c = 0.01 * (i % 3) * 255
	t.color = self.road_color + c
	t.road_visited = False
	t.road_friction = 1.0
	t.idx = i
	t.fixtures[0].sensor = True
	self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
	self.road.append(t)
	if border[i]:
	side = np.sign(beta2 - beta1)
	b1_l = (
	x1 + side * TRACK_WIDTH * math.cos(beta1),
	y1 + side * TRACK_WIDTH * math.sin(beta1),
	)
	b1_r = (
	x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
	y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
	)
	b2_l = (
	x2 + side * TRACK_WIDTH * math.cos(beta2),
	y2 + side * TRACK_WIDTH * math.sin(beta2),
	)
	b2_r = (
	x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
	y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
	)
	self.road_poly.append(
	(
	[b1_l, b1_r, b2_r, b2_l],
	(255, 255, 255) if i % 2 == 0 else (255, 0, 0),
	)
	)
	self.track = track
	return True

	def reset(
	self,
	*,
	seed: Optional[int] = None,
	options: Optional[dict] = None,
	):
	super().reset(seed=seed)
	self._destroy()
	self.world.contactListener_bug_workaround = FrictionDetector(
	self, self.lap_complete_percent
	)
	self.world.contactListener = self.world.contactListener_bug_workaround
	self.reward = 0.0
	self.prev_reward = 0.0
	self.tile_visited_count = 0
	self.t = 0.0
	self.new_lap = False
	self.road_poly = []

	if self.domain_randomize:
	randomize = True
	if isinstance(options, dict):
	if "randomize" in options:
	randomize = options["randomize"]

	self._reinit_colors(randomize)

	while True:
	success = self._create_track()
	if success:
	break
	if self.verbose:
	print(
	"retry to generate track (normal if there are not many"
	"instances of this message)"
	)
	self.car = Car(self.world, *self.track[0][1:4])

	if self.render_mode == "human":
	self.render()
	return self.step(None)[0], {}

	def step(self, action: Union[np.ndarray, int]):
	assert self.car is not None
	if action is not None:
	if self.continuous:
	self.car.steer(-action[0])
	self.car.gas(action[1])
	self.car.brake(action[2])
	else:
	if not self.action_space.contains(action):
	raise InvalidAction(
	f"you passed the invalid action `{action}`. "
	f"The supported action_space is `{self.action_space}`"
	)
	self.car.steer(-0.6 * (action == 1) + 0.6 * (action == 2))
	self.car.gas(0.2 * (action == 3))
	self.car.brake(0.8 * (action == 4))

	self.car.step(1.0 / FPS)
	self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
	self.t += 1.0 / FPS

	self.state = self._render("state_pixels")

	step_reward = 0
	terminated = False
	truncated = False
	if action is not None: # First step without action, called from reset()
	self.reward -= 0.1
	# We actually don't want to count fuel spent, we want car to be faster.
	# self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
	self.car.fuel_spent = 0.0
	step_reward = self.reward - self.prev_reward
	self.prev_reward = self.reward
	if self.tile_visited_count == len(self.track) or self.new_lap:
	# Truncation due to finishing lap
	# This should not be treated as a failure
	# but like a timeout
	truncated = True
	x, y = self.car.hull.position
	if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
	terminated = True
	step_reward = -100

	if self.render_mode == "human":
	self.render()
	return self.state, step_reward, terminated, truncated, {}

	def render(self):
	if self.render_mode is None:
	gym.logger.warn(
	"You are calling render method without specifying any render mode. "
	"You can specify the render_mode at initialization, "
	f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
	)
	else:
	return self._render(self.render_mode)

	def _render(self, mode: str):
	assert mode in self.metadata["render_modes"]

	pygame.font.init()
	if self.screen is None and mode == "human":
	pygame.init()
	pygame.display.init()
	self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H))
	if self.clock is None:
	self.clock = pygame.time.Clock()

	if "t" not in self.__dict__:
	return # reset() not called yet

	self.surf = pygame.Surface((WINDOW_W, WINDOW_H))

	assert self.car is not None
	# computing transformations
	angle = -self.car.hull.angle
	# Animating first second zoom.
	zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
	scroll_x = -(self.car.hull.position[0]) * zoom
	scroll_y = -(self.car.hull.position[1]) * zoom
	trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
	trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])

	self._render_road(zoom, trans, angle)
	self.car.draw(
	self.surf,
	zoom,
	trans,
	angle,
	mode not in ["state_pixels_list", "state_pixels"],
	)

	self.surf = pygame.transform.flip(self.surf, False, True)

	# showing stats
	self._render_indicators(WINDOW_W, WINDOW_H)

	font = pygame.font.Font(pygame.font.get_default_font(), 42)
	text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0))
	text_rect = text.get_rect()
	text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0)
	self.surf.blit(text, text_rect)

	if mode == "human":
	pygame.event.pump()
	self.clock.tick(self.metadata["render_fps"])
	assert self.screen is not None
	self.screen.fill(0)
	self.screen.blit(self.surf, (0, 0))
	pygame.display.flip()

	if mode == "rgb_array":
	return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H))
	elif mode == "state_pixels":
	return self._create_image_array(self.surf, (STATE_W, STATE_H))
	else:
	return self.isopen

	def _render_road(self, zoom, translation, angle):
	bounds = PLAYFIELD
	field = [
	(bounds, bounds),
	(bounds, -bounds),
	(-bounds, -bounds),
	(-bounds, bounds),
	]

	# draw background
	self._draw_colored_polygon(
	self.surf, field, self.bg_color, zoom, translation, angle, clip=False
	)

	# draw grass patches
	grass = []
	for x in range(-20, 20, 2):
	for y in range(-20, 20, 2):
	grass.append(
	[
	(GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + 0),
	(GRASS_DIM * x + 0, GRASS_DIM * y + 0),
	(GRASS_DIM * x + 0, GRASS_DIM * y + GRASS_DIM),
	(GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + GRASS_DIM),
	]
	)
	for poly in grass:
	self._draw_colored_polygon(
	self.surf, poly, self.grass_color, zoom, translation, angle
	)

	# draw road
	for poly, color in self.road_poly:
	# converting to pixel coordinates
	poly = [(p[0], p[1]) for p in poly]
	color = [int(c) for c in color]
	self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle)

	def _render_indicators(self, W, H):
	s = W / 40.0
	h = H / 40.0
	color = (0, 0, 0)
	polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)]
	pygame.draw.polygon(self.surf, color=color, points=polygon)

	def vertical_ind(place, val):
	return [
	(place * s, H - (h + h * val)),
	((place + 1) * s, H - (h + h * val)),
	((place + 1) * s, H - h),
	((place + 0) * s, H - h),
	]

	def horiz_ind(place, val):
	return [
	((place + 0) * s, H - 4 * h),
	((place + val) * s, H - 4 * h),
	((place + val) * s, H - 2 * h),
	((place + 0) * s, H - 2 * h),
	]

	assert self.car is not None
	true_speed = np.sqrt(
	np.square(self.car.hull.linearVelocity[0])
	+ np.square(self.car.hull.linearVelocity[1])
	)

	# simple wrapper to render if the indicator value is above a threshold
	def render_if_min(value, points, color):
	if abs(value) > 1e-4:
	pygame.draw.polygon(self.surf, points=points, color=color)

	render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255))
	# ABS sensors
	render_if_min(
	self.car.wheels[0].omega,
	vertical_ind(7, 0.01 * self.car.wheels[0].omega),
	(0, 0, 255),
	)
	render_if_min(
	self.car.wheels[1].omega,
	vertical_ind(8, 0.01 * self.car.wheels[1].omega),
	(0, 0, 255),
	)
	render_if_min(
	self.car.wheels[2].omega,
	vertical_ind(9, 0.01 * self.car.wheels[2].omega),
	(51, 0, 255),
	)
	render_if_min(
	self.car.wheels[3].omega,
	vertical_ind(10, 0.01 * self.car.wheels[3].omega),
	(51, 0, 255),
	)

	render_if_min(
	self.car.wheels[0].joint.angle,
	horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle),
	(0, 255, 0),
	)
	render_if_min(
	self.car.hull.angularVelocity,
	horiz_ind(30, -0.8 * self.car.hull.angularVelocity),
	(255, 0, 0),
	)

	def _draw_colored_polygon(
	self, surface, poly, color, zoom, translation, angle, clip=True
	):
	poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly]
	poly = [
	(c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly
	]
	# This checks if the polygon is out of bounds of the screen, and we skip drawing if so.
	# Instead of calculating exactly if the polygon and screen overlap,
	# we simply check if the polygon is in a larger bounding box whose dimension
	# is greater than the screen by MAX_SHAPE_DIM, which is the maximum
	# diagonal length of an environment object
	if not clip or any(
	(-MAX_SHAPE_DIM <= coord[0] <= WINDOW_W + MAX_SHAPE_DIM)
	and (-MAX_SHAPE_DIM <= coord[1] <= WINDOW_H + MAX_SHAPE_DIM)
	for coord in poly
	):
	gfxdraw.aapolygon(self.surf, poly, color)
	gfxdraw.filled_polygon(self.surf, poly, color)

	def _create_image_array(self, screen, size):
	scaled_screen = pygame.transform.smoothscale(screen, size)
	return np.transpose(
	np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2)
	)

	def close(self):
	if self.screen is not None:
	pygame.display.quit()
	self.isopen = False
	pygame.quit()


	if __name__ == "__main__":
	a = np.array([0.0, 0.0, 0.0])

	def register_input():
	global quit, restart
	for event in pygame.event.get():
	if event.type == pygame.KEYDOWN:
	if event.key == pygame.K_LEFT:
	a[0] = -1.0
	if event.key == pygame.K_RIGHT:
	a[0] = +1.0
	if event.key == pygame.K_UP:
	a[1] = +1.0
	if event.key == pygame.K_DOWN:
	a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
	if event.key == pygame.K_RETURN:
	restart = True
	if event.key == pygame.K_ESCAPE:
	quit = True

	if event.type == pygame.KEYUP:
	if event.key == pygame.K_LEFT:
	a[0] = 0
	if event.key == pygame.K_RIGHT:
	a[0] = 0
	if event.key == pygame.K_UP:
	a[1] = 0
	if event.key == pygame.K_DOWN:
	a[2] = 0

	if event.type == pygame.QUIT:
	quit = True

	env = CarRacing(render_mode="human")

	quit = False
	while not quit:
	env.reset()
	total_reward = 0.0
	steps = 0
	restart = False
	while True:
	register_input()
	s, r, terminated, truncated, info = env.step(a)
	total_reward += r
	if steps % 200 == 0 or terminated or truncated:
	print("\naction " + str([f"{x:+0.2f}" for x in a]))
	print(f"step {steps} total_reward {total_reward:+0.2f}")
	steps += 1
	if terminated or truncated or restart or quit:
	break
	env.close()