Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

Andrei Cozma commited on Apr 24, 2023

Commit

eb7667c

1 Parent(s): 35d83a8

Updates

Files changed (1) hide show

MCAgent.py CHANGED Viewed

@@ -20,7 +20,7 @@ class MCAgent(AgentBase):
         self.Pi = np.full(
             (self.n_states, self.n_actions), self.epsilon / self.n_actions
         )
-        # The greedy action receives the remaining probability mass
         self.Pi[
             np.arange(self.n_states),
             np.random.randint(self.n_actions, size=self.n_states),

         self.Pi = np.full(
             (self.n_states, self.n_actions), self.epsilon / self.n_actions
         )
+        # For the initial policy, we randomly select a greedy action for each state
         self.Pi[
             np.arange(self.n_states),
             np.random.randint(self.n_actions, size=self.n_states),