Spaces:
Runtime error
Runtime error
Commit
·
fe7b65d
1
Parent(s):
f7d4fd5
Update objects_9x9.py
Browse files- objects_9x9.py +5 -5
objects_9x9.py
CHANGED
@@ -76,13 +76,13 @@ class State:
|
|
76 |
# backpropagate reward
|
77 |
if result == 1:
|
78 |
self.p1.feedReward(1)
|
79 |
-
self.p2.feedReward(
|
80 |
elif result == -1:
|
81 |
-
self.p1.feedReward(
|
82 |
self.p2.feedReward(1)
|
83 |
else:
|
84 |
self.p1.feedReward(0.1)
|
85 |
-
self.p2.feedReward(0.
|
86 |
|
87 |
# board reset
|
88 |
def reset(self):
|
@@ -189,7 +189,7 @@ class Player:
|
|
189 |
def __init__(self, name, exp_rate=0.3):
|
190 |
self.name = name
|
191 |
self.states = [] # record all positions taken
|
192 |
-
self.lr = 0.
|
193 |
self.exp_rate = exp_rate
|
194 |
self.decay_gamma = 0.9
|
195 |
self.states_value = {} # state -> value
|
@@ -270,7 +270,7 @@ if __name__ == "__main__":
|
|
270 |
|
271 |
st = State(p1, p2)
|
272 |
print("training...")
|
273 |
-
st.playwithbot(
|
274 |
|
275 |
p1.savePolicy()
|
276 |
p2.savePolicy()
|
|
|
76 |
# backpropagate reward
|
77 |
if result == 1:
|
78 |
self.p1.feedReward(1)
|
79 |
+
self.p2.feedReward(-1)
|
80 |
elif result == -1:
|
81 |
+
self.p1.feedReward(-1)
|
82 |
self.p2.feedReward(1)
|
83 |
else:
|
84 |
self.p1.feedReward(0.1)
|
85 |
+
self.p2.feedReward(0.1)
|
86 |
|
87 |
# board reset
|
88 |
def reset(self):
|
|
|
189 |
def __init__(self, name, exp_rate=0.3):
|
190 |
self.name = name
|
191 |
self.states = [] # record all positions taken
|
192 |
+
self.lr = 0.1
|
193 |
self.exp_rate = exp_rate
|
194 |
self.decay_gamma = 0.9
|
195 |
self.states_value = {} # state -> value
|
|
|
270 |
|
271 |
st = State(p1, p2)
|
272 |
print("training...")
|
273 |
+
st.playwithbot(500000)
|
274 |
|
275 |
p1.savePolicy()
|
276 |
p2.savePolicy()
|