Spaces:
Sleeping
Sleeping
import numpy as np | |
from pip import main | |
class SarsaAgent: | |
""" | |
The Agent that uses SARSA update to improve it's behaviour | |
""" | |
def __init__(self, epsilon, alpha, gamma, num_state, num_actions, action_space): | |
""" | |
Constructor | |
Args: | |
epsilon: The degree of exploration | |
gamma: The discount factor | |
num_state: The number of states | |
num_actions: The number of actions | |
action_space: To call the random action | |
""" | |
self.epsilon = epsilon | |
self.alpha = alpha | |
self.gamma = gamma | |
self.num_state = num_state | |
self.num_actions = num_actions | |
self.Q = np.zeros((self.num_state, self.num_actions)) | |
self.action_space = action_space | |
def update(self, prev_state, next_state, reward, prev_action, next_action): | |
""" | |
Update the action value function using the SARSA update. | |
Q(S, A) = Q(S, A) + alpha(reward + (gamma * Q(S_, A_) - Q(S, A)) | |
Args: | |
prev_state: The previous state | |
next_state: The next state | |
reward: The reward for taking the respective action | |
prev_action: The previous action | |
next_action: The next action | |
Returns: | |
None | |
""" | |
predict = self.Q[prev_state, prev_action] | |
target = reward + self.gamma * self.Q[next_state, next_action] | |
self.Q[prev_state, prev_action] += self.alpha * (target - predict) | |
episode = [ | |
["s1", "a1", -8], | |
["s1", "a2", -16], | |
["s2", "a1", 20], | |
["s1", "a2", -10], | |
["s2", "a1", None], | |
] | |
index_map = { | |
"s1": 0, | |
"s2": 1, | |
"a1": 0, | |
"a2": 1, | |
} | |
def main_r(): | |
print("# SarsaAgent.py") | |
agent = SarsaAgent(0.1, 0.5, 0.5, 2, 2, [0, 1]) | |
print(agent.Q) | |
for i in range(len(episode) - 1): | |
print(f"# Step {i + 1}") | |
s, a, r = episode[i] | |
s2, a2, r2 = episode[i + 1] | |
agent.update(index_map[s], index_map[s2], r, index_map[a], index_map[a2]) | |
print(agent.Q) | |
if __name__ == "__main__": | |
main_r() | |