lharri73 commited on
Commit
17d4626
·
1 Parent(s): e282b5d

retrain frozenlake

Browse files
DPAgent.py CHANGED
@@ -16,8 +16,6 @@ class DPAgent(Shared):
16
  self.Pi = np.zeros(self.env.observation_space.n, self.env.action_space.n)
17
  if self.gamma >= 1.0:
18
  warnings.warn("DP will never converge with a gamma value =1.0. Try 0.99?", UserWarning)
19
- print(self.env)
20
- exit(1)
21
 
22
  def policy(self, state):
23
  return self.Pi[state]
@@ -68,7 +66,7 @@ class DPAgent(Shared):
68
 
69
  if __name__ == "__main__":
70
  # env = gym.make('FrozenLake-v1', render_mode='human')
71
- dp = DPAgent(env_name="FrozenLake-v1", gamma=0.99)
72
  dp.train()
73
  dp.save_policy('dp_policy.npy')
74
  env = gym.make('FrozenLake-v1', render_mode='human', is_slippery=False, desc=[
 
16
  self.Pi = np.zeros(self.env.observation_space.n, self.env.action_space.n)
17
  if self.gamma >= 1.0:
18
  warnings.warn("DP will never converge with a gamma value =1.0. Try 0.99?", UserWarning)
 
 
19
 
20
  def policy(self, state):
21
  return self.Pi[state]
 
66
 
67
  if __name__ == "__main__":
68
  # env = gym.make('FrozenLake-v1', render_mode='human')
69
+ dp = DPAgent(env="FrozenLake-v1", gamma=0.99)
70
  dp.train()
71
  dp.save_policy('dp_policy.npy')
72
  env = gym.make('FrozenLake-v1', render_mode='human', is_slippery=False, desc=[
MCAgent.py CHANGED
@@ -9,13 +9,13 @@ class MCAgent(Shared):
9
  def __init__(
10
  self,
11
  /,
12
- env_name="CliffWalking-v0",
13
  gamma=0.99,
14
  epsilon=0.1,
15
  run_name=None,
16
  **kwargs,
17
  ):
18
- super().__init__(env_name, gamma, epsilon, run_name, **kwargs)
19
  self.reset()
20
 
21
  def reset(self):
 
9
  def __init__(
10
  self,
11
  /,
12
+ env="CliffWalking-v0",
13
  gamma=0.99,
14
  epsilon=0.1,
15
  run_name=None,
16
  **kwargs,
17
  ):
18
+ super().__init__(env, gamma, epsilon, run_name, **kwargs)
19
  self.reset()
20
 
21
  def reset(self):
Shared.py CHANGED
@@ -6,19 +6,19 @@ class Shared:
6
 
7
  def __init__(
8
  self,/,
9
- env_name="CliffWalking-v0",
10
  gamma=0.99,
11
  epsilon=0.1,
12
  run_name=None,
13
  **kwargs,
14
  ):
15
  print("=" * 80)
16
- print(f"# Init Agent - {env_name}")
17
  print(f"- epsilon: {epsilon}")
18
  print(f"- gamma: {gamma}")
19
  print(f"- run_name: {run_name}")
20
  self.run_name = run_name
21
- self.env_name = env_name
22
  self.epsilon, self.gamma = epsilon, gamma
23
 
24
  self.env_kwargs = {k:v for k,v in kwargs.items() if k in ['render_mode']}
 
6
 
7
  def __init__(
8
  self,/,
9
+ env="CliffWalking-v0",
10
  gamma=0.99,
11
  epsilon=0.1,
12
  run_name=None,
13
  **kwargs,
14
  ):
15
  print("=" * 80)
16
+ print(f"# Init Agent - {env}")
17
  print(f"- epsilon: {epsilon}")
18
  print(f"- gamma: {gamma}")
19
  print(f"- run_name: {run_name}")
20
  self.run_name = run_name
21
+ self.env_name = env
22
  self.epsilon, self.gamma = epsilon, gamma
23
 
24
  self.env_kwargs = {k:v for k,v in kwargs.items() if k in ['render_mode']}
policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy CHANGED
Binary files a/policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy and b/policies/DPAgent_FrozenLake-v1_e2500_s200_g0.99_e0.4_first_visit.npy differ