Spaces:
Sleeping
Sleeping
import numpy as np | |
episodes = [ | |
[["A", "a1", 3], ["A", "a2", 2], ["B", "a1", -4], ["A", "a1", 4], ["B", "a1", -3]], | |
[["B", "a1", -2], ["A", "a1", 3], ["B", "a2", -3]], | |
] | |
index_map = { | |
"states": { | |
"A": 0, | |
"B": 1, | |
}, | |
"actions": { | |
"a1": 0, | |
"a2": 1, | |
}, | |
} | |
def main_r(): | |
print("# MonteCarloAgent.py") | |
alpha = 0.1 | |
num_states = 2 | |
v = np.zeros(num_states) | |
rets = {s: [] for s in index_map["states"].keys()} | |
for ep in episodes: | |
print("=" * 80) | |
g = 0 | |
ep_len = len(ep) | |
print(f"# Episode: {ep} (steps: {ep_len}) G: {g}") | |
for t in range(ep_len - 1, -1, -1): | |
s, a, r = ep[t] | |
si = index_map["states"][s] | |
g = g + r | |
print(f"# Step {t + 1}:") | |
print(f"\ts: {s}, a: {a}, r: {r}") | |
print(f"\tG: {g}") | |
# unless st appears in the episode before time t | |
if s not in [x[0] for x in ep[:t]]: | |
rets[s].append(g) | |
v[si] = alpha * (sum(rets[s]) / len(rets[s])) | |
# v[si] = v[si] + alpha * (g - v[si]) | |
print(f"\tV[{s}] = {v[si]}") | |
if __name__ == "__main__": | |
main_r() | |