Spaces:
Sleeping
Sleeping
from minigrid.core.grid import Grid | |
from minigrid.core.mission import MissionSpace | |
from minigrid.minigrid_env import * | |
from minigrid.utils.rendering import * | |
from minigrid.core.world_object import WorldObj | |
import random | |
class NoisyTVEnv(MiniGridEnv): | |
""" | |
### Description | |
Classic four room reinforcement learning environment with random noise. The agent must | |
navigate in a maze composed of four rooms interconnected by 4 gaps in the | |
walls. To obtain a reward, the agent must reach the green goal square. Both | |
the agent and the goal square are randomly placed in any of the four rooms. | |
### Mission Space | |
"reach the goal" | |
### Action Space | |
| Num | Name | Action | | |
|-----|--------------|--------------| | |
| 0 | left | Turn left | | |
| 1 | right | Turn right | | |
| 2 | forward | Move forward | | |
| 3 | pickup | Unused | | |
| 4 | drop | Unused | | |
| 5 | toggle | Unused | | |
| 6 | done | Unused | | |
### Observation Encoding | |
- Each tile is encoded as a 3 dimensional tuple: | |
`(OBJECT_IDX, COLOR_IDX, STATE)` | |
- `OBJECT_TO_IDX` and `COLOR_TO_IDX` mapping can be found in | |
[minigrid/minigrid.py](minigrid/minigrid.py) | |
- `STATE` refers to the door state with 0=open, 1=closed and 2=locked | |
### Rewards | |
A reward of '1' is given for success, and '0' for failure. | |
Noisy reward are given upon reaching a noisy tile. Noise obeys Gaussian distribution. | |
### Termination | |
The episode ends if any one of the following conditions is met: | |
1. The agent reaches the goal. | |
2. Timeout (see `max_steps`). | |
### Registered Configurations | |
- `MiniGrid-NoisyTV-v0` | |
""" | |
def __init__(self, agent_pos=None, goal_pos=None, noisy_tile_num=4, **kwargs): | |
self._agent_default_pos = agent_pos | |
self._goal_default_pos = goal_pos | |
self.size = 19 | |
self._noisy_tile_num = noisy_tile_num | |
self._noisy_tile_pos = [] | |
for i in range(self._noisy_tile_num): | |
pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1)) | |
while pos2 in self._noisy_tile_pos: | |
pos2 = (self._rand_int(1, self.size - 1), self._rand_int(1, self.size - 1)) | |
self._noisy_tile_pos.append(pos2) | |
mission_space = MissionSpace(mission_func=lambda: "reach the goal") | |
super().__init__(mission_space=mission_space, width=self.size, height=self.size, max_steps=100, **kwargs) | |
def _reward_noise(self): | |
""" | |
Compute the reward to be given upon reach a noisy tile | |
""" | |
return self._rand_float(0.05, 0.1) | |
def _add_noise(self, obs): | |
""" | |
Add noise to obs['image'] | |
""" | |
image = obs['image'].astype(float) | |
for pos in self._noisy_tile_pos: | |
if self.in_view(pos[0], pos[1]): # if noisy tile is in the view of agent, the view of agent is 7*7. | |
relative_pos = self.relative_coords(pos[0], pos[1]) | |
image[relative_pos][1] += 0.5 | |
obs['image'] = image | |
return obs | |
def _gen_grid(self, width, height): | |
# Create the grid | |
self.grid = Grid(width, height) | |
# Generate the surrounding walls | |
self.grid.horz_wall(0, 0) | |
self.grid.horz_wall(0, height - 1) | |
self.grid.vert_wall(0, 0) | |
self.grid.vert_wall(width - 1, 0) | |
room_w = width // 2 | |
room_h = height // 2 | |
# For each row of rooms | |
for j in range(0, 2): | |
# For each column | |
for i in range(0, 2): | |
xL = i * room_w | |
yT = j * room_h | |
xR = xL + room_w | |
yB = yT + room_h | |
# Bottom wall and door | |
if i + 1 < 2: | |
self.grid.vert_wall(xR, yT, room_h) | |
pos = (xR, self._rand_int(yT + 1, yB)) | |
self.grid.set(*pos, None) | |
# Bottom wall and door | |
if j + 1 < 2: | |
self.grid.horz_wall(xL, yB, room_w) | |
pos = (self._rand_int(xL + 1, xR), yB) | |
self.grid.set(*pos, None) | |
# Randomize the player start position and orientation | |
if self._agent_default_pos is not None: | |
self.agent_pos = self._agent_default_pos | |
self.grid.set(*self._agent_default_pos, None) | |
# assuming random start direction | |
self.agent_dir = self._rand_int(0, 4) | |
else: | |
self.place_agent() | |
if self._goal_default_pos is not None: | |
goal = Goal() | |
self.put_obj(goal, *self._goal_default_pos) | |
goal.init_pos, goal.cur_pos = self._goal_default_pos | |
else: | |
self.place_obj(Goal()) | |
def step(self, action): | |
self.step_count += 1 | |
reward = 0 | |
terminated = False | |
truncated = False | |
# Get the position in front of the agent | |
fwd_pos = self.front_pos | |
# Get the contents of the cell in front of the agent | |
fwd_cell = self.grid.get(*fwd_pos) | |
# Rotate left | |
if action == self.actions.left: | |
self.agent_dir -= 1 | |
if self.agent_dir < 0: | |
self.agent_dir += 4 | |
# Rotate right | |
elif action == self.actions.right: | |
self.agent_dir = (self.agent_dir + 1) % 4 | |
# Move forward | |
elif action == self.actions.forward: | |
if fwd_cell is None or fwd_cell.can_overlap(): | |
self.agent_pos = tuple(fwd_pos) | |
if fwd_cell is not None and fwd_cell.type == "goal": | |
terminated = True | |
reward = self._reward() | |
if fwd_cell is not None and fwd_cell.type == "lava": | |
terminated = True | |
# if agent reach noisy tile, return noisy reward. | |
if self.agent_pos in self._noisy_tile_pos: | |
reward = self._reward_noise() | |
# Pick up an object | |
elif action == self.actions.pickup: | |
if fwd_cell and fwd_cell.can_pickup(): | |
if self.carrying is None: | |
self.carrying = fwd_cell | |
self.carrying.cur_pos = np.array([-1, -1]) | |
self.grid.set(fwd_pos[0], fwd_pos[1], None) | |
# Drop an object | |
elif action == self.actions.drop: | |
if not fwd_cell and self.carrying: | |
self.grid.set(fwd_pos[0], fwd_pos[1], self.carrying) | |
self.carrying.cur_pos = fwd_pos | |
self.carrying = None | |
# Toggle/activate an object | |
elif action == self.actions.toggle: | |
if fwd_cell: | |
fwd_cell.toggle(self, fwd_pos) | |
# Done action (not used by default) | |
elif action == self.actions.done: | |
pass | |
else: | |
raise ValueError(f"Unknown action: {action}") | |
if self.step_count >= self.max_steps: | |
truncated = True | |
if self.render_mode == "human": | |
self.render() | |
obs = self.gen_obs() | |
obs = self._add_noise(obs) | |
return obs, reward, terminated, truncated, {} | |