lharri73 commited on
Commit
5fc752e
·
1 Parent(s): d678220
Files changed (2) hide show
  1. DPAgent.py +7 -34
  2. MCAgent.py +1 -0
DPAgent.py CHANGED
@@ -3,15 +3,14 @@ import numpy as np
3
  from gymnasium.envs.toy_text.frozen_lake import generate_random_map
4
  from matplotlib import pyplot as plt
5
  from tqdm import trange
 
6
 
7
 
8
- class DPAgent:
9
- def __init__(self, env_name, gamma=0.9, theta=1e-10, **kwargs):
10
- self.env = gym.make(env_name, **kwargs)
11
- self.gamma = gamma
12
  self.theta = theta
13
  self.V = np.zeros(self.env.observation_space.n)
14
- self.epsilon = 0
15
  self.Pi = None
16
 
17
  def policy(self, state):
@@ -39,7 +38,7 @@ class DPAgent:
39
  if delta < self.theta:
40
  break
41
  i += 1
42
- # self.test()
43
  print(f"Iteration {i}: delta={delta}")
44
  # break
45
 
@@ -52,36 +51,10 @@ class DPAgent:
52
  # if state == self.env.observation_space.n-1: reward = 1
53
  expected_value += probability * (reward + self.gamma * self.V[next_state])
54
  self.Pi[s,a] = expected_value
 
 
55
  # return self.V, self.Pi
56
 
57
- def generate_episode(self, max_steps, render=False, **kwargs):
58
- state, _ = self.env.reset()
59
- episode_hist, solved, rgb_array = [], False, None
60
-
61
- # Generate an episode following the current policy
62
- for _ in range(max_steps):
63
- rgb_array = self.env.render() if render else None
64
- # Sample an action from the policy
65
- action = self.policy(state)
66
- maction = np.argmax(action)
67
- # Take the action and observe the reward and next state
68
- next_state, reward, done, truncated, _ = self.env.step(maction)
69
- # Keeping track of the trajectory
70
- episode_hist.append((state, maction, reward))
71
- state = next_state
72
-
73
- yield episode_hist, solved, rgb_array
74
-
75
- # This is where the agent got to the goal.
76
- # In the case in which agent jumped off the cliff, it is simply respawned at the start position without termination.
77
- if done or truncated:
78
- solved = True
79
- break
80
-
81
- rgb_array = self.env.render() if render else None
82
-
83
- yield episode_hist, solved, rgb_array
84
-
85
 
86
  if __name__ == "__main__":
87
  # env = gym.make('FrozenLake-v1', render_mode='human')
 
3
  from gymnasium.envs.toy_text.frozen_lake import generate_random_map
4
  from matplotlib import pyplot as plt
5
  from tqdm import trange
6
+ from Shared import Shared
7
 
8
 
9
+ class DPAgent(Shared):
10
+ def __init__(self, theta=1e-10, **kwargs):
11
+ super().__init__(**kwargs)
 
12
  self.theta = theta
13
  self.V = np.zeros(self.env.observation_space.n)
 
14
  self.Pi = None
15
 
16
  def policy(self, state):
 
38
  if delta < self.theta:
39
  break
40
  i += 1
41
+ self.test()
42
  print(f"Iteration {i}: delta={delta}")
43
  # break
44
 
 
51
  # if state == self.env.observation_space.n-1: reward = 1
52
  expected_value += probability * (reward + self.gamma * self.V[next_state])
53
  self.Pi[s,a] = expected_value
54
+ self.Pi = np.argmax(self.Pi, axis=1)
55
+ print(self.Pi)
56
  # return self.V, self.Pi
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  if __name__ == "__main__":
60
  # env = gym.make('FrozenLake-v1', render_mode='human')
MCAgent.py CHANGED
@@ -2,6 +2,7 @@ import numpy as np
2
  from tqdm import tqdm
3
  from Shared import Shared
4
  import wandb
 
5
 
6
  class MCAgent(Shared):
7
 
 
2
  from tqdm import tqdm
3
  from Shared import Shared
4
  import wandb
5
+ from Shared import Shared
6
 
7
  class MCAgent(Shared):
8