Andrei Cozma commited on
Commit
ec9cd4e
·
1 Parent(s): fba39bb
Files changed (1) hide show
  1. AgentBase.py +10 -4
AgentBase.py CHANGED
@@ -63,17 +63,23 @@ class AgentBase:
63
  print(f"- n_actions: {self.n_actions}")
64
 
65
  def choose_action(self, state, greedy=False, **kwargs):
66
- # Sample an action from the policy.
67
- # The epsilon_override argument allows forcing the use of a new epsilon value than the one previously used during training.
68
- # The ability to override was mostly added for testing purposes and for the demo.
 
 
 
 
 
69
  greedy_action = np.argmax(self.Pi[state])
70
-
71
  if greedy or self.epsilon_override == 0.0:
72
  return greedy_action
73
 
 
74
  if self.epsilon_override is None:
75
  return np.random.choice(self.n_actions, p=self.Pi[state])
76
 
 
77
  return np.random.choice(
78
  [greedy_action, np.random.randint(self.n_actions)],
79
  p=[1.0 - self.epsilon_override, self.epsilon_override],
 
63
  print(f"- n_actions: {self.n_actions}")
64
 
65
  def choose_action(self, state, greedy=False, **kwargs):
66
+ """
67
+ Sample an action from the policy.
68
+ Also allows the ability to override the epsilon value (for the purpose of the demo)
69
+ :param state: The current state
70
+ :param greedy: If True, always return the greedy action (argmax of the policy at the current state)
71
+ :return: The sampled action
72
+ """
73
+ # If greedy is True, always return the greedy action
74
  greedy_action = np.argmax(self.Pi[state])
 
75
  if greedy or self.epsilon_override == 0.0:
76
  return greedy_action
77
 
78
+ # Otherwise, sample an action from the soft policy (epsilon-greedy)
79
  if self.epsilon_override is None:
80
  return np.random.choice(self.n_actions, p=self.Pi[state])
81
 
82
+ # If we ever want to manually override the epsilon value, it happens here
83
  return np.random.choice(
84
  [greedy_action, np.random.randint(self.n_actions)],
85
  p=[1.0 - self.epsilon_override, self.epsilon_override],