CS581-Algos-Demo / scripts /MonteCarloAgent.py
Andrei Cozma
Updates
f902143
import numpy as np
episodes = [
[["A", "a1", 3], ["A", "a2", 2], ["B", "a1", -4], ["A", "a1", 4], ["B", "a1", -3]],
[["B", "a1", -2], ["A", "a1", 3], ["B", "a2", -3]],
]
index_map = {
"states": {
"A": 0,
"B": 1,
},
"actions": {
"a1": 0,
"a2": 1,
},
}
def main_r():
print("# MonteCarloAgent.py")
alpha = 0.1
num_states = 2
v = np.zeros(num_states)
rets = {s: [] for s in index_map["states"].keys()}
for ep in episodes:
print("=" * 80)
g = 0
ep_len = len(ep)
print(f"# Episode: {ep} (steps: {ep_len}) G: {g}")
for t in range(ep_len - 1, -1, -1):
s, a, r = ep[t]
si = index_map["states"][s]
g = g + r
print(f"# Step {t + 1}:")
print(f"\ts: {s}, a: {a}, r: {r}")
print(f"\tG: {g}")
# unless st appears in the episode before time t
if s not in [x[0] for x in ep[:t]]:
rets[s].append(g)
v[si] = alpha * (sum(rets[s]) / len(rets[s]))
# v[si] = v[si] + alpha * (g - v[si])
print(f"\tV[{s}] = {v[si]}")
if __name__ == "__main__":
main_r()