Spaces:
Sleeping
Sleeping
Andrei Cozma
commited on
Commit
·
ec9cd4e
1
Parent(s):
fba39bb
Updates
Browse files- AgentBase.py +10 -4
AgentBase.py
CHANGED
@@ -63,17 +63,23 @@ class AgentBase:
|
|
63 |
print(f"- n_actions: {self.n_actions}")
|
64 |
|
65 |
def choose_action(self, state, greedy=False, **kwargs):
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
69 |
greedy_action = np.argmax(self.Pi[state])
|
70 |
-
|
71 |
if greedy or self.epsilon_override == 0.0:
|
72 |
return greedy_action
|
73 |
|
|
|
74 |
if self.epsilon_override is None:
|
75 |
return np.random.choice(self.n_actions, p=self.Pi[state])
|
76 |
|
|
|
77 |
return np.random.choice(
|
78 |
[greedy_action, np.random.randint(self.n_actions)],
|
79 |
p=[1.0 - self.epsilon_override, self.epsilon_override],
|
|
|
63 |
print(f"- n_actions: {self.n_actions}")
|
64 |
|
65 |
def choose_action(self, state, greedy=False, **kwargs):
|
66 |
+
"""
|
67 |
+
Sample an action from the policy.
|
68 |
+
Also allows the ability to override the epsilon value (for the purpose of the demo)
|
69 |
+
:param state: The current state
|
70 |
+
:param greedy: If True, always return the greedy action (argmax of the policy at the current state)
|
71 |
+
:return: The sampled action
|
72 |
+
"""
|
73 |
+
# If greedy is True, always return the greedy action
|
74 |
greedy_action = np.argmax(self.Pi[state])
|
|
|
75 |
if greedy or self.epsilon_override == 0.0:
|
76 |
return greedy_action
|
77 |
|
78 |
+
# Otherwise, sample an action from the soft policy (epsilon-greedy)
|
79 |
if self.epsilon_override is None:
|
80 |
return np.random.choice(self.n_actions, p=self.Pi[state])
|
81 |
|
82 |
+
# If we ever want to manually override the epsilon value, it happens here
|
83 |
return np.random.choice(
|
84 |
[greedy_action, np.random.randint(self.n_actions)],
|
85 |
p=[1.0 - self.epsilon_override, self.epsilon_override],
|