Andrei Cozma commited on
Commit
eb7667c
·
1 Parent(s): 35d83a8
Files changed (1) hide show
  1. MCAgent.py +1 -1
MCAgent.py CHANGED
@@ -20,7 +20,7 @@ class MCAgent(AgentBase):
20
  self.Pi = np.full(
21
  (self.n_states, self.n_actions), self.epsilon / self.n_actions
22
  )
23
- # The greedy action receives the remaining probability mass
24
  self.Pi[
25
  np.arange(self.n_states),
26
  np.random.randint(self.n_actions, size=self.n_states),
 
20
  self.Pi = np.full(
21
  (self.n_states, self.n_actions), self.epsilon / self.n_actions
22
  )
23
+ # For the initial policy, we randomly select a greedy action for each state
24
  self.Pi[
25
  np.arange(self.n_states),
26
  np.random.randint(self.n_actions, size=self.n_states),