Add application file
Browse filesit works
player settings
fix bug
refine the settings and add help doc
fix param
- .gitattributes +1 -0
- Arena.py +102 -0
- Game.py +113 -0
- MCTS.py +256 -0
- NeuralNet.py +50 -0
- app.py +244 -0
- models/firstBlood.pth.tar +3 -0
- models/random.pth.tar +3 -0
- models/super.pth.tar +3 -0
- no_one/NoOneGame.py +206 -0
- no_one/NoOneLogic.py +107 -0
- no_one/NoOnePlayes.py +42 -0
- no_one/__init__.py +0 -0
- no_one/pytorch/NNet.py +120 -0
- no_one/pytorch/NoOneNNet.py +54 -0
- no_one/pytorch/__init__.py +0 -0
- requirements.txt +46 -0
- utils.py +22 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pth.tar filter=lfs diff=lfs merge=lfs -text
|
Arena.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
from tqdm import tqdm
|
4 |
+
|
5 |
+
log = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
|
8 |
+
class Arena():
|
9 |
+
"""
|
10 |
+
An Arena class where any 2 agents can be pit against each other.
|
11 |
+
"""
|
12 |
+
|
13 |
+
def __init__(self, player1, player2, game, display=None):
|
14 |
+
"""
|
15 |
+
Input:
|
16 |
+
player 1,2: two functions that takes board as input, return action
|
17 |
+
game: Game object
|
18 |
+
display: a function that takes board as input and prints it (e.g.
|
19 |
+
display in othello/OthelloGame). Is necessary for verbose
|
20 |
+
mode.
|
21 |
+
|
22 |
+
see othello/OthelloPlayers.py for an example. See pit.py for pitting
|
23 |
+
human players/other baselines with each other.
|
24 |
+
"""
|
25 |
+
self.player1 = player1
|
26 |
+
self.player2 = player2
|
27 |
+
self.game = game
|
28 |
+
self.display = display
|
29 |
+
|
30 |
+
def playGame(self, verbose=False):
|
31 |
+
"""
|
32 |
+
Executes one episode of a game.
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
either
|
36 |
+
winner: player who won the game (1 if player1, -1 if player2)
|
37 |
+
or
|
38 |
+
draw result returned from the game that is neither 1, -1, nor 0.
|
39 |
+
"""
|
40 |
+
players = [self.player2, None, self.player1]
|
41 |
+
curPlayer = 1
|
42 |
+
board = self.game.getInitBoard()
|
43 |
+
it = 0
|
44 |
+
while self.game.getGameEnded(board, curPlayer) == 0:
|
45 |
+
if it > 100:
|
46 |
+
return 0.01
|
47 |
+
it += 1
|
48 |
+
if verbose:
|
49 |
+
assert self.display
|
50 |
+
print("Turn ", str(it), "Player ", str(curPlayer))
|
51 |
+
self.display(board)
|
52 |
+
action = players[curPlayer + 1](self.game.getCanonicalForm(board, curPlayer))
|
53 |
+
|
54 |
+
valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)
|
55 |
+
|
56 |
+
if valids[action] == 0:
|
57 |
+
log.error(f'Action {action} is not valid!')
|
58 |
+
log.debug(f'valids = {valids}')
|
59 |
+
assert valids[action] > 0
|
60 |
+
board, curPlayer = self.game.getNextState(board, curPlayer, action)
|
61 |
+
if verbose:
|
62 |
+
assert self.display
|
63 |
+
print("Game over: Turn ", str(it), "Result ", str(self.game.getGameEnded(board, 1)))
|
64 |
+
self.display(board)
|
65 |
+
return curPlayer * self.game.getGameEnded(board, curPlayer)
|
66 |
+
|
67 |
+
def playGames(self, num, verbose=False):
|
68 |
+
"""
|
69 |
+
Plays num games in which player1 starts num/2 games and player2 starts
|
70 |
+
num/2 games.
|
71 |
+
|
72 |
+
Returns:
|
73 |
+
oneWon: games won by player1
|
74 |
+
twoWon: games won by player2
|
75 |
+
draws: games won by nobody
|
76 |
+
"""
|
77 |
+
|
78 |
+
num = int(num / 2)
|
79 |
+
oneWon = 0
|
80 |
+
twoWon = 0
|
81 |
+
draws = 0
|
82 |
+
for _ in tqdm(range(num), desc="Arena.playGames (1)"):
|
83 |
+
gameResult = self.playGame(verbose=verbose)
|
84 |
+
if gameResult == 1:
|
85 |
+
oneWon += 1
|
86 |
+
elif gameResult == -1:
|
87 |
+
twoWon += 1
|
88 |
+
else:
|
89 |
+
draws += 1
|
90 |
+
|
91 |
+
self.player1, self.player2 = self.player2, self.player1
|
92 |
+
|
93 |
+
for _ in tqdm(range(num), desc="Arena.playGames (2)"):
|
94 |
+
gameResult = self.playGame(verbose=verbose)
|
95 |
+
if gameResult == -1:
|
96 |
+
oneWon += 1
|
97 |
+
elif gameResult == 1:
|
98 |
+
twoWon += 1
|
99 |
+
else:
|
100 |
+
draws += 1
|
101 |
+
|
102 |
+
return oneWon, twoWon, draws
|
Game.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Game():
|
2 |
+
"""
|
3 |
+
This class specifies the base Game class. To define your own game, subclass
|
4 |
+
this class and implement the functions below. This works when the game is
|
5 |
+
two-player, adversarial and turn-based.
|
6 |
+
|
7 |
+
Use 1 for player1 and -1 for player2.
|
8 |
+
|
9 |
+
See othello/OthelloGame.py for an example implementation.
|
10 |
+
"""
|
11 |
+
def __init__(self):
|
12 |
+
pass
|
13 |
+
|
14 |
+
def getInitBoard(self):
|
15 |
+
"""
|
16 |
+
Returns:
|
17 |
+
startBoard: a representation of the board (ideally this is the form
|
18 |
+
that will be the input to your neural network)
|
19 |
+
"""
|
20 |
+
pass
|
21 |
+
|
22 |
+
def getBoardSize(self):
|
23 |
+
"""
|
24 |
+
Returns:
|
25 |
+
(x,y): a tuple of board dimensions
|
26 |
+
"""
|
27 |
+
pass
|
28 |
+
|
29 |
+
def getActionSize(self):
|
30 |
+
"""
|
31 |
+
Returns:
|
32 |
+
actionSize: number of all possible actions
|
33 |
+
"""
|
34 |
+
pass
|
35 |
+
|
36 |
+
def getNextState(self, board, player, action):
|
37 |
+
"""
|
38 |
+
Input:
|
39 |
+
board: current board
|
40 |
+
player: current player (1 or -1)
|
41 |
+
action: action taken by current player
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
nextBoard: board after applying action
|
45 |
+
nextPlayer: player who plays in the next turn (should be -player)
|
46 |
+
"""
|
47 |
+
pass
|
48 |
+
|
49 |
+
def getValidMoves(self, board, player):
|
50 |
+
"""
|
51 |
+
Input:
|
52 |
+
board: current board
|
53 |
+
player: current player
|
54 |
+
|
55 |
+
Returns:
|
56 |
+
validMoves: a binary vector of length self.getActionSize(), 1 for
|
57 |
+
moves that are valid from the current board and player,
|
58 |
+
0 for invalid moves
|
59 |
+
"""
|
60 |
+
pass
|
61 |
+
|
62 |
+
def getGameEnded(self, board, player):
|
63 |
+
"""
|
64 |
+
Input:
|
65 |
+
board: current board
|
66 |
+
player: current player (1 or -1)
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
r: 0 if game has not ended. 1 if player won, -1 if player lost,
|
70 |
+
small non-zero value for draw.
|
71 |
+
|
72 |
+
"""
|
73 |
+
pass
|
74 |
+
|
75 |
+
def getCanonicalForm(self, board, player):
|
76 |
+
"""
|
77 |
+
Input:
|
78 |
+
board: current board
|
79 |
+
player: current player (1 or -1)
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
canonicalBoard: returns canonical form of board. The canonical form
|
83 |
+
should be independent of player. For e.g. in chess,
|
84 |
+
the canonical form can be chosen to be from the pov
|
85 |
+
of white. When the player is white, we can return
|
86 |
+
board as is. When the player is black, we can invert
|
87 |
+
the colors and return the board.
|
88 |
+
"""
|
89 |
+
pass
|
90 |
+
|
91 |
+
def getSymmetries(self, board, pi):
|
92 |
+
"""
|
93 |
+
Input:
|
94 |
+
board: current board
|
95 |
+
pi: policy vector of size self.getActionSize()
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
symmForms: a list of [(board,pi)] where each tuple is a symmetrical
|
99 |
+
form of the board and the corresponding pi vector. This
|
100 |
+
is used when training the neural network from examples.
|
101 |
+
"""
|
102 |
+
pass
|
103 |
+
|
104 |
+
def stringRepresentation(self, board):
|
105 |
+
"""
|
106 |
+
Input:
|
107 |
+
board: current board
|
108 |
+
|
109 |
+
Returns:
|
110 |
+
boardString: a quick conversion of board to a string format.
|
111 |
+
Required by MCTS for hashing.
|
112 |
+
"""
|
113 |
+
pass
|
MCTS.py
ADDED
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import math
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
|
7 |
+
# from no_one.NoOnePlayes import HumanPlayer
|
8 |
+
|
9 |
+
EPS = 1e-8
|
10 |
+
|
11 |
+
log = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
|
14 |
+
class MCTS():
|
15 |
+
"""
|
16 |
+
This class handles the MCTS tree.
|
17 |
+
"""
|
18 |
+
|
19 |
+
def __init__(self, game, nnet, args):
|
20 |
+
self.game = game
|
21 |
+
self.nnet = nnet
|
22 |
+
self.args = args
|
23 |
+
self.Qsa = {} # stores Q values for s,a (as defined in the paper)
|
24 |
+
self.Nsa = {} # stores #times edge s,a was visited
|
25 |
+
self.Ns = {} # stores #times board s was visited
|
26 |
+
self.Ps = {} # stores initial policy (returned by neural net)
|
27 |
+
|
28 |
+
self.Es = {} # stores game.getGameEnded ended for board s
|
29 |
+
self.Vs = {} # stores game.getValidMoves for board s
|
30 |
+
|
31 |
+
def getActionProb(self, canonicalBoard, temp=1):
|
32 |
+
"""
|
33 |
+
This function performs numMCTSSims simulations of MCTS starting from
|
34 |
+
canonicalBoard.
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
probs: a policy vector where the probability of the ith action is
|
38 |
+
proportional to Nsa[(s,a)]**(1./temp)
|
39 |
+
"""
|
40 |
+
for i in range(self.args.numMCTSSims):
|
41 |
+
# self.search(canonicalBoard)
|
42 |
+
self.game.reset_steps()
|
43 |
+
self.search(canonicalBoard)
|
44 |
+
|
45 |
+
s = self.game.stringRepresentation(canonicalBoard)
|
46 |
+
counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
|
47 |
+
|
48 |
+
if temp == 0:
|
49 |
+
bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()
|
50 |
+
bestA = np.random.choice(bestAs)
|
51 |
+
probs = [0] * len(counts)
|
52 |
+
probs[bestA] = 1
|
53 |
+
return probs
|
54 |
+
|
55 |
+
counts = [x ** (1. / temp) for x in counts]
|
56 |
+
counts_sum = float(sum(counts))
|
57 |
+
if counts_sum == 0:
|
58 |
+
print(len(counts))
|
59 |
+
probs = [x / counts_sum for x in counts]
|
60 |
+
return probs
|
61 |
+
|
62 |
+
def search_iterate(self, canonicalBoard):
|
63 |
+
stack = [(0, (canonicalBoard,))] # Stack of (state, depth, parent_index)
|
64 |
+
results = [] # To store the results of leaf or terminal nodes
|
65 |
+
|
66 |
+
while stack:
|
67 |
+
st, sv = stack.pop()
|
68 |
+
if st == 0:
|
69 |
+
result, ns = self.search_iterate_st0(sv[0])
|
70 |
+
if result is not None:
|
71 |
+
results.append(result)
|
72 |
+
if ns is not None:
|
73 |
+
stack.append((1, (ns[1], ns[2])))
|
74 |
+
stack.append((0, (ns[0],)))
|
75 |
+
elif st == 1:
|
76 |
+
v = results.pop()
|
77 |
+
v = self.search_iterate_update(v, sv[0], sv[1])
|
78 |
+
results.append(v)
|
79 |
+
else:
|
80 |
+
raise ValueError("Invalid state")
|
81 |
+
return results.pop()
|
82 |
+
|
83 |
+
def search_iterate_st0(self, canonicalBoard):
|
84 |
+
s = self.game.stringRepresentation(canonicalBoard)
|
85 |
+
|
86 |
+
if s not in self.Es:
|
87 |
+
self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
|
88 |
+
if self.Es[s] != 0:
|
89 |
+
result = -self.Es[s]
|
90 |
+
return result, None
|
91 |
+
if s not in self.Ps:
|
92 |
+
# leaf node
|
93 |
+
self.Ps[s], v = self.nnet.predict(canonicalBoard)
|
94 |
+
valids = self.game.getValidMoves(canonicalBoard, 1)
|
95 |
+
self.Ps[s] = self.Ps[s] * valids
|
96 |
+
sum_Ps_s = np.sum(self.Ps[s])
|
97 |
+
if sum_Ps_s > 0:
|
98 |
+
self.Ps[s] /= sum_Ps_s
|
99 |
+
else:
|
100 |
+
self.Ps[s] = self.Ps[s] + valids
|
101 |
+
self.Ps[s] /= np.sum(self.Ps[s])
|
102 |
+
|
103 |
+
self.Vs[s] = valids
|
104 |
+
self.Ns[s] = 0
|
105 |
+
|
106 |
+
return -v, None
|
107 |
+
|
108 |
+
valids = self.Vs[s]
|
109 |
+
cur_best = -float('inf')
|
110 |
+
best_act = -1
|
111 |
+
|
112 |
+
for a in range(self.game.getActionSize()):
|
113 |
+
if valids[a]:
|
114 |
+
if (s, a) in self.Qsa:
|
115 |
+
u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (1 + self.Nsa[(s, a)])
|
116 |
+
else:
|
117 |
+
u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS)
|
118 |
+
|
119 |
+
if u > cur_best:
|
120 |
+
cur_best = u
|
121 |
+
best_act = a
|
122 |
+
|
123 |
+
next_s, next_player = self.game.getNextState(canonicalBoard, 1, best_act)
|
124 |
+
next_s = self.game.getCanonicalForm(next_s, next_player)
|
125 |
+
|
126 |
+
return None, (next_s, s, best_act)
|
127 |
+
|
128 |
+
# index = len(results) # Current index in results
|
129 |
+
# stack.append((next_s, depth + 1, index))
|
130 |
+
|
131 |
+
# # Backpropagate results
|
132 |
+
# for v, parent_index in reversed(results):
|
133 |
+
# if parent_index is not None:
|
134 |
+
# parent_v, _ = results[parent_index]
|
135 |
+
# results[parent_index] = ((parent_v * self.Ns[s] + v) / (self.Ns[s] + 1), _)
|
136 |
+
# self.Ns[s] += 1
|
137 |
+
|
138 |
+
# # Update Qsa and Nsa based on backpropagation
|
139 |
+
# for i, (v, parent_index) in enumerate(results):
|
140 |
+
# if parent_index is not None: # Ignore root
|
141 |
+
# _, action = stack[i] # Assuming we also pushed actions to stack
|
142 |
+
# if (s, action) in self.Qsa:
|
143 |
+
# self.Qsa[(s, action)] = (self.Nsa[(s, action)] * self.Qsa[(s, action)] + v) / (self.Nsa[(s, action)] + 1)
|
144 |
+
# self.Nsa[(s, action)] += 1
|
145 |
+
# else:
|
146 |
+
# self.Qsa[(s, action)] = v
|
147 |
+
# self.Nsa[(s, action)] = 1
|
148 |
+
|
149 |
+
# return -results[0][0] # Return the negated value of the root node
|
150 |
+
|
151 |
+
def search_iterate_update(self, v, s, a):
|
152 |
+
if (s, a) in self.Qsa:
|
153 |
+
self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
|
154 |
+
self.Nsa[(s, a)] += 1
|
155 |
+
|
156 |
+
else:
|
157 |
+
self.Qsa[(s, a)] = v
|
158 |
+
self.Nsa[(s, a)] = 1
|
159 |
+
|
160 |
+
self.Ns[s] += 1
|
161 |
+
return -v
|
162 |
+
|
163 |
+
def search(self, canonicalBoard, depth=0):
|
164 |
+
"""
|
165 |
+
This function performs one iteration of MCTS. It is recursively called
|
166 |
+
till a leaf node is found. The action chosen at each node is one that
|
167 |
+
has the maximum upper confidence bound as in the paper.
|
168 |
+
|
169 |
+
Once a leaf node is found, the neural network is called to return an
|
170 |
+
initial policy P and a value v for the state. This value is propagated
|
171 |
+
up the search path. In case the leaf node is a terminal state, the
|
172 |
+
outcome is propagated up the search path. The values of Ns, Nsa, Qsa are
|
173 |
+
updated.
|
174 |
+
|
175 |
+
NOTE: the return values are the negative of the value of the current
|
176 |
+
state. This is done since v is in [-1,1] and if v is the value of a
|
177 |
+
state for the current player, then its value is -v for the other player.
|
178 |
+
|
179 |
+
Returns:
|
180 |
+
v: the negative of the value of the current canonicalBoard
|
181 |
+
"""
|
182 |
+
|
183 |
+
s = self.game.stringRepresentation(canonicalBoard)
|
184 |
+
|
185 |
+
if s not in self.Es:
|
186 |
+
self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
|
187 |
+
if self.Es[s] != 0:
|
188 |
+
# terminal node
|
189 |
+
return -self.Es[s]
|
190 |
+
|
191 |
+
if s not in self.Ps:
|
192 |
+
# leaf node
|
193 |
+
self.Ps[s], v = self.nnet.predict(canonicalBoard)
|
194 |
+
valids = self.game.getValidMoves(canonicalBoard, 1)
|
195 |
+
self.Ps[s] = self.Ps[s] * valids # masking invalid moves
|
196 |
+
sum_Ps_s = np.sum(self.Ps[s])
|
197 |
+
if sum_Ps_s > 0:
|
198 |
+
self.Ps[s] /= sum_Ps_s # renormalize
|
199 |
+
else:
|
200 |
+
# if all valid moves were masked make all valid moves equally probable
|
201 |
+
|
202 |
+
# NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
|
203 |
+
# If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
|
204 |
+
log.error("All valid moves were masked, doing a workaround.")
|
205 |
+
self.Ps[s] = self.Ps[s] + valids
|
206 |
+
self.Ps[s] /= np.sum(self.Ps[s])
|
207 |
+
|
208 |
+
self.Vs[s] = valids
|
209 |
+
self.Ns[s] = 0
|
210 |
+
return -v
|
211 |
+
|
212 |
+
valids = self.Vs[s]
|
213 |
+
cur_best = -float('inf')
|
214 |
+
best_act = -1
|
215 |
+
|
216 |
+
# pick the action with the highest upper confidence bound
|
217 |
+
for a in range(self.game.getActionSize()):
|
218 |
+
if valids[a]:
|
219 |
+
if (s, a) in self.Qsa:
|
220 |
+
u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
|
221 |
+
1 + self.Nsa[(s, a)])
|
222 |
+
else:
|
223 |
+
u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS) # Q = 0 ?
|
224 |
+
|
225 |
+
if u > cur_best:
|
226 |
+
cur_best = u
|
227 |
+
best_act = a
|
228 |
+
|
229 |
+
a = best_act
|
230 |
+
|
231 |
+
if depth > 100:
|
232 |
+
candidates = self.game.getValidMoves(canonicalBoard, 1)
|
233 |
+
a = np.random.choice([i for i in range(len(candidates)) if candidates[i] == 1])
|
234 |
+
# self.game.display(canonicalBoard)
|
235 |
+
# human_player = HumanPlayer(self.game)
|
236 |
+
# a = human_player.play(canonicalBoard)
|
237 |
+
depth = 80
|
238 |
+
|
239 |
+
|
240 |
+
next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
|
241 |
+
next_s = self.game.getCanonicalForm(next_s, next_player)
|
242 |
+
# print("*", end="")
|
243 |
+
# self.game.display(next_s)
|
244 |
+
|
245 |
+
v = self.search(next_s, depth=depth + 1)
|
246 |
+
|
247 |
+
if (s, a) in self.Qsa:
|
248 |
+
self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
|
249 |
+
self.Nsa[(s, a)] += 1
|
250 |
+
|
251 |
+
else:
|
252 |
+
self.Qsa[(s, a)] = v
|
253 |
+
self.Nsa[(s, a)] = 1
|
254 |
+
|
255 |
+
self.Ns[s] += 1
|
256 |
+
return -v
|
NeuralNet.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class NeuralNet():
|
2 |
+
"""
|
3 |
+
This class specifies the base NeuralNet class. To define your own neural
|
4 |
+
network, subclass this class and implement the functions below. The neural
|
5 |
+
network does not consider the current player, and instead only deals with
|
6 |
+
the canonical form of the board.
|
7 |
+
|
8 |
+
See othello/NNet.py for an example implementation.
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self, game):
|
12 |
+
pass
|
13 |
+
|
14 |
+
def train(self, examples):
|
15 |
+
"""
|
16 |
+
This function trains the neural network with examples obtained from
|
17 |
+
self-play.
|
18 |
+
|
19 |
+
Input:
|
20 |
+
examples: a list of training examples, where each example is of form
|
21 |
+
(board, pi, v). pi is the MCTS informed policy vector for
|
22 |
+
the given board, and v is its value. The examples has
|
23 |
+
board in its canonical form.
|
24 |
+
"""
|
25 |
+
pass
|
26 |
+
|
27 |
+
def predict(self, board):
|
28 |
+
"""
|
29 |
+
Input:
|
30 |
+
board: current board in its canonical form.
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
pi: a policy vector for the current board- a numpy array of length
|
34 |
+
game.getActionSize
|
35 |
+
v: a float in [-1,1] that gives the value of the current board
|
36 |
+
"""
|
37 |
+
pass
|
38 |
+
|
39 |
+
def save_checkpoint(self, folder, filename):
|
40 |
+
"""
|
41 |
+
Saves the current neural network (with its parameters) in
|
42 |
+
folder/filename
|
43 |
+
"""
|
44 |
+
pass
|
45 |
+
|
46 |
+
def load_checkpoint(self, folder, filename):
|
47 |
+
"""
|
48 |
+
Loads parameters of the neural network from folder/filename
|
49 |
+
"""
|
50 |
+
pass
|
app.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
import Arena
|
5 |
+
|
6 |
+
from MCTS import MCTS
|
7 |
+
|
8 |
+
from no_one.NoOneGame import NoOneGame
|
9 |
+
|
10 |
+
|
11 |
+
from no_one.pytorch.NNet import NNetWrapper as NNet
|
12 |
+
from utils import *
|
13 |
+
|
14 |
+
import time
|
15 |
+
|
16 |
+
|
17 |
+
square_content = {
|
18 |
+
-1: "❌",
|
19 |
+
+0: "·",
|
20 |
+
+1: "⭕"
|
21 |
+
}
|
22 |
+
|
23 |
+
players = [{"name": "❌"}, {"name": "⭕"}]
|
24 |
+
player_types = [":rainbow[AI]", "Human"]
|
25 |
+
ai_ranks = ["Super", "First Blood", "Random"]
|
26 |
+
ai_models = {
|
27 |
+
"Super": "super.pth.tar",
|
28 |
+
"First Blood": "firstBlood.pth.tar",
|
29 |
+
"Random": "random.pth.tar",
|
30 |
+
}
|
31 |
+
|
32 |
+
game = NoOneGame(4)
|
33 |
+
|
34 |
+
def check_clicked(i, j):
|
35 |
+
if st.session_state.clicked == (i, j):
|
36 |
+
return True
|
37 |
+
return False
|
38 |
+
|
39 |
+
class HumanPlayer:
|
40 |
+
def __init__(self, game):
|
41 |
+
self.game = game
|
42 |
+
def encode(self, src, target):
|
43 |
+
row, col = src[0] - target[0], src[1] - target[1]
|
44 |
+
if (row, col) == (0, -1):
|
45 |
+
d = 1
|
46 |
+
elif (row, col) == (0, 1):
|
47 |
+
d = 3
|
48 |
+
elif (row, col) == (-1, 0):
|
49 |
+
d = 0
|
50 |
+
elif (row, col) == (1, 0):
|
51 |
+
d = 2
|
52 |
+
else:
|
53 |
+
return None
|
54 |
+
return self.game.encodeAction(src, d)
|
55 |
+
def play(self, board, player, src, target):
|
56 |
+
action = self.encode(src, target)
|
57 |
+
if action is None:
|
58 |
+
return None
|
59 |
+
valids = self.game.getValidMoves(board, player)
|
60 |
+
if valids[action] != 1:
|
61 |
+
return None
|
62 |
+
return action
|
63 |
+
|
64 |
+
|
65 |
+
def ai_player(ai_model):
|
66 |
+
n1 = NNet(game)
|
67 |
+
n1.load_checkpoint('./models/', ai_models[ai_model])
|
68 |
+
args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
|
69 |
+
msts1 = MCTS(game, n1, args1)
|
70 |
+
return lambda x: np.argmax(msts1.getActionProb(x, temp=0))
|
71 |
+
|
72 |
+
|
73 |
+
def human_step(i, j):
|
74 |
+
if st.session_state.clicked is None:
|
75 |
+
if st.session_state.board[i, j] != st.session_state.player:
|
76 |
+
st.toast('Invalid move!')
|
77 |
+
return
|
78 |
+
st.session_state.clicked = (i, j)
|
79 |
+
return
|
80 |
+
elif st.session_state.clicked == (i, j):
|
81 |
+
# selection canceled
|
82 |
+
st.session_state.clicked = None
|
83 |
+
return
|
84 |
+
elif st.session_state.board[i, j] == st.session_state.player:
|
85 |
+
st.session_state.clicked = (i, j)
|
86 |
+
return
|
87 |
+
else:
|
88 |
+
# move
|
89 |
+
player = st.session_state.players[st.session_state.player + 1]
|
90 |
+
action = player.play(st.session_state.board, st.session_state.player, st.session_state.clicked, (i, j))
|
91 |
+
if action is None:
|
92 |
+
st.toast('Invalid move!')
|
93 |
+
return
|
94 |
+
move(action)
|
95 |
+
st.session_state.clicked = None
|
96 |
+
|
97 |
+
|
98 |
+
def ai_step():
|
99 |
+
p = st.session_state.player
|
100 |
+
action = st.session_state.players[p+1](game.getCanonicalForm(st.session_state.board, st.session_state.player))
|
101 |
+
# st.session_state.board, st.session_state.player = game.getNextState(
|
102 |
+
# st.session_state.board, p, action,
|
103 |
+
# )
|
104 |
+
move(action)
|
105 |
+
|
106 |
+
|
107 |
+
def move(action):
|
108 |
+
p = st.session_state.player
|
109 |
+
st.session_state.board, st.session_state.player = game.getNextState(
|
110 |
+
st.session_state.board, p, action,
|
111 |
+
)
|
112 |
+
|
113 |
+
res = game.getGameEnded(st.session_state.board, p)
|
114 |
+
if res != 0:
|
115 |
+
st.session_state.winner = res*p
|
116 |
+
st.session_state.win[res*p] += 1
|
117 |
+
st.balloons()
|
118 |
+
|
119 |
+
def reinit():
|
120 |
+
st.session_state.reinit = True
|
121 |
+
|
122 |
+
def init(post_init=False):
|
123 |
+
st.session_state.reinit = False
|
124 |
+
if not post_init:
|
125 |
+
st.session_state.win = {-1: 0, 1: 0}
|
126 |
+
|
127 |
+
st.session_state.board = game.getInitBoard()
|
128 |
+
st.session_state.player = -1
|
129 |
+
st.session_state.players = [None for i in range(2)]
|
130 |
+
for i, setting in enumerate(st.session_state.player_settings):
|
131 |
+
if setting["pt"] == player_types[1]:
|
132 |
+
st.session_state.players[i] = HumanPlayer(game)
|
133 |
+
else:
|
134 |
+
st.session_state.players[i] = ai_player(setting["ai_model"])
|
135 |
+
st.session_state.players.insert(1, None)
|
136 |
+
|
137 |
+
st.session_state.winner = None
|
138 |
+
st.session_state.clicked = None
|
139 |
+
|
140 |
+
|
141 |
+
def player_to_index(p):
|
142 |
+
if p == -1:
|
143 |
+
return 0
|
144 |
+
elif p == 1:
|
145 |
+
return 1
|
146 |
+
|
147 |
+
|
148 |
+
def main():
|
149 |
+
if "player_settings" not in st.session_state:
|
150 |
+
st.session_state.player_settings = [
|
151 |
+
{"pt": player_types[1], "ai_model": ai_ranks[0]},
|
152 |
+
{"pt": player_types[0], "ai_model": ai_ranks[1]},
|
153 |
+
]
|
154 |
+
if "reinit" not in st.session_state:
|
155 |
+
st.session_state.reinit = False
|
156 |
+
|
157 |
+
fire, settings = st.columns([1, 2])
|
158 |
+
fire.button('New Game', on_click=init, args=(True,))
|
159 |
+
with settings.expander(
|
160 |
+
'Settings',
|
161 |
+
expanded=False,
|
162 |
+
):
|
163 |
+
st.warning('Any setting changing will restart the game immediately', icon="⚠️")
|
164 |
+
for i, p in enumerate(st.columns([0.5, 0.5])):
|
165 |
+
with p:
|
166 |
+
# players[i]
|
167 |
+
st.session_state.player_settings[i]["pt"] = st.radio(
|
168 |
+
f"Who will play %s" % players[i]["name"],
|
169 |
+
player_types,
|
170 |
+
key=f"xp_type_{i}",
|
171 |
+
horizontal=True,
|
172 |
+
index=player_types.index(st.session_state.player_settings[i]["pt"]),
|
173 |
+
on_change=reinit,
|
174 |
+
)
|
175 |
+
if st.session_state.player_settings[i]["pt"] == player_types[0]:
|
176 |
+
st.session_state.player_settings[i]["ai_model"] = st.radio(
|
177 |
+
"AI rank",
|
178 |
+
ai_ranks,
|
179 |
+
key=f"xp_rank_{i}",
|
180 |
+
index=ai_ranks.index(st.session_state.player_settings[i]["ai_model"]),
|
181 |
+
on_change=reinit,
|
182 |
+
)
|
183 |
+
|
184 |
+
st.divider()
|
185 |
+
st.checkbox("Show me how to play", key="show_how_to_play")
|
186 |
+
|
187 |
+
|
188 |
+
if "board" not in st.session_state or st.session_state.reinit:
|
189 |
+
init()
|
190 |
+
|
191 |
+
if st.session_state.show_how_to_play:
|
192 |
+
with st.sidebar:
|
193 |
+
st.title("How to play")
|
194 |
+
how_to = '''1. 游戏的目标是吃掉对方的棋子
|
195 |
+
开局双方各有四枚棋子,被吃剩一枚棋子即可判负
|
196 |
+
2. 棋子可以向上下左右移动到空白位置
|
197 |
+
3. 如何吃子
|
198 |
+
1. 移动后的棋子需要跟本方其他某个棋子,在水平方向,或者垂直方向上连住
|
199 |
+
2. 如果连住后的棋子两端有对方一个棋子,那么这个对方的棋子就被吃掉
|
200 |
+
3. 如果对方也有两颗棋子,则互相不吃
|
201 |
+
4. 如果落子在对方连起来的两枚棋子一端,也不会被吃
|
202 |
+
5. 如果选择 AI vs AI,需要手动点击比分下面的回合按钮,触发下一次落子
|
203 |
+
'''
|
204 |
+
st.markdown(how_to)
|
205 |
+
|
206 |
+
|
207 |
+
xp, score, op = st.columns([2, 8, 2])
|
208 |
+
for i, p in enumerate([xp, op]):
|
209 |
+
p.title(players[i]["name"], anchor=False)
|
210 |
+
caption = st.session_state.player_settings[i]["pt"]
|
211 |
+
if st.session_state.player_settings[i]["pt"] == player_types[0]:
|
212 |
+
caption = st.session_state.player_settings[i]["ai_model"] + " " + caption
|
213 |
+
p.caption(caption)
|
214 |
+
|
215 |
+
# st.session_state
|
216 |
+
|
217 |
+
if st.session_state.player_settings[player_to_index(st.session_state.player)]['pt'] != "Human" and st.session_state.winner is None:
|
218 |
+
ai_step()
|
219 |
+
|
220 |
+
st.divider()
|
221 |
+
|
222 |
+
for i, row in enumerate(st.session_state.board):
|
223 |
+
cols = st.columns([5, 1, 1, 1, 1, 5])
|
224 |
+
for j, field in enumerate(row):
|
225 |
+
cols[j + 1].button(
|
226 |
+
square_content[field],
|
227 |
+
key=f"{i}-{j}",
|
228 |
+
type=f'{"primary" if check_clicked(i, j) else "secondary"}',
|
229 |
+
on_click=human_step,
|
230 |
+
args=(i, j),
|
231 |
+
)
|
232 |
+
|
233 |
+
s = score.columns([2, 2, 2])
|
234 |
+
s[1].title(f'{st.session_state.win[-1]} : {st.session_state.win[1]}', anchor=False)
|
235 |
+
s[1].button(
|
236 |
+
f'{"❌" if st.session_state.player == -1 else "⭕"}\'s turn'
|
237 |
+
if not st.session_state.winner
|
238 |
+
else f'🏁 Game finished'
|
239 |
+
)
|
240 |
+
|
241 |
+
|
242 |
+
|
243 |
+
if __name__ == '__main__':
|
244 |
+
main()
|
models/firstBlood.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:944608ae57a59c24f3c5ab6ed4cb7f387903154bf97d6a57cbf207747a55f304
|
3 |
+
size 39033691
|
models/random.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ac3537ec1644bf08ae41488d7a98fdcf1637ff6009de01d36b1edb4cc0e8c53
|
3 |
+
size 39033691
|
models/super.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e24aa62ce56a68e74d78593e487d38a4a6a65917b05f8a684a33274072edf4a0
|
3 |
+
size 39034331
|
no_one/NoOneGame.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
import sys
|
3 |
+
sys.path.append('..')
|
4 |
+
from Game import Game
|
5 |
+
from .NoOneLogic import Board
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
class NoOneGame(Game):
|
9 |
+
"""
|
10 |
+
This class specifies the base Game class. To define your own game, subclass
|
11 |
+
this class and implement the functions below. This works when the game is
|
12 |
+
two-player, adversarial and turn-based.
|
13 |
+
|
14 |
+
Use 1 for player1 and -1 for player2.
|
15 |
+
|
16 |
+
See othello/OthelloGame.py for an example implementation.
|
17 |
+
"""
|
18 |
+
|
19 |
+
square_content = {
|
20 |
+
-1: "X",
|
21 |
+
+0: "-",
|
22 |
+
+1: "O"
|
23 |
+
}
|
24 |
+
def __init__(self, n):
|
25 |
+
self.n = n
|
26 |
+
self.steps = 0
|
27 |
+
|
28 |
+
def reach_max_steps(self):
|
29 |
+
return self.steps >= 500
|
30 |
+
|
31 |
+
def reset_steps(self):
|
32 |
+
self.steps = 0
|
33 |
+
|
34 |
+
def getInitBoard(self):
|
35 |
+
"""
|
36 |
+
Returns:
|
37 |
+
startBoard: a representation of the board (ideally this is the form
|
38 |
+
that will be the input to your neural network)
|
39 |
+
"""
|
40 |
+
b = Board(self.n)
|
41 |
+
return np.array(b.pieces)
|
42 |
+
|
43 |
+
def getBoardSize(self):
|
44 |
+
"""
|
45 |
+
Returns:
|
46 |
+
(x,y): a tuple of board dimensions
|
47 |
+
"""
|
48 |
+
return (self.n, self.n)
|
49 |
+
|
50 |
+
def getActionSize(self):
|
51 |
+
"""
|
52 |
+
Returns:
|
53 |
+
actionSize: number of all possible actions
|
54 |
+
"""
|
55 |
+
return self.n * self.n * 4
|
56 |
+
|
57 |
+
def decodeAction(self, action):
|
58 |
+
i = action // (self.n * self.n)
|
59 |
+
x = (action % (self.n * self.n)) % self.n
|
60 |
+
y = (action % (self.n * self.n)) // self.n
|
61 |
+
return ([x, y], i)
|
62 |
+
|
63 |
+
def encodeAction(self, pos, i):
|
64 |
+
return self.n*self.n*i + self.n*pos[1] + pos[0]
|
65 |
+
|
66 |
+
def getNextState(self, board, player, action):
|
67 |
+
"""
|
68 |
+
Input:
|
69 |
+
board: current board
|
70 |
+
player: current player (1 or -1)
|
71 |
+
action: action taken by current player
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
nextBoard: board after applying action
|
75 |
+
nextPlayer: player who plays in the next turn (should be -player)
|
76 |
+
"""
|
77 |
+
|
78 |
+
(pos, i) = self.decodeAction(action)
|
79 |
+
|
80 |
+
b = Board(self.n)
|
81 |
+
b.pieces = np.copy(board)
|
82 |
+
move = (pos, i)
|
83 |
+
b.execute_move(move, player)
|
84 |
+
# self.display(b.pieces)
|
85 |
+
self.steps += 1
|
86 |
+
return (b.pieces, -player)
|
87 |
+
|
88 |
+
def getValidMoves(self, board, player):
|
89 |
+
"""
|
90 |
+
Input:
|
91 |
+
board: current board
|
92 |
+
player: current player
|
93 |
+
|
94 |
+
Returns:
|
95 |
+
validMoves: a binary vector of length self.getActionSize(), 1 for
|
96 |
+
moves that are valid from the current board and player,
|
97 |
+
0 for invalid moves
|
98 |
+
"""
|
99 |
+
valids = [0]*self.getActionSize()
|
100 |
+
b = Board(self.n)
|
101 |
+
b.pieces = np.copy(board)
|
102 |
+
legalMoves = b.get_legal_moves(player)
|
103 |
+
for (pos, i) in legalMoves:
|
104 |
+
valids[self.encodeAction(pos, i)] = 1
|
105 |
+
return np.array(valids)
|
106 |
+
|
107 |
+
def getGameEnded(self, board, player):
|
108 |
+
"""
|
109 |
+
Input:
|
110 |
+
board: current board
|
111 |
+
player: current player (1 or -1)
|
112 |
+
|
113 |
+
Returns:
|
114 |
+
r: 0 if game has not ended. 1 if player won, -1 if player lost,
|
115 |
+
small non-zero value for draw.
|
116 |
+
|
117 |
+
"""
|
118 |
+
b = Board(self.n)
|
119 |
+
b.pieces = np.copy(board)
|
120 |
+
|
121 |
+
# for the early stage of training
|
122 |
+
# if b.count(player) == b.count(-player):
|
123 |
+
# return 0
|
124 |
+
# return 1 if b.count(player) > b.count(-player) else -1
|
125 |
+
|
126 |
+
if b.count(player) <= 1:
|
127 |
+
return -1
|
128 |
+
if b.count(-player) <= 1:
|
129 |
+
return 1
|
130 |
+
|
131 |
+
# if self.reach_max_steps():
|
132 |
+
# print("reach max steps")
|
133 |
+
# self.steps = 0
|
134 |
+
# if b.count(player) == b.count(-player):
|
135 |
+
# return 0.01
|
136 |
+
# return 1 if b.count(player) > b.count(-player) else -1
|
137 |
+
return 0
|
138 |
+
|
139 |
+
def getCanonicalForm(self, board, player):
|
140 |
+
"""
|
141 |
+
Input:
|
142 |
+
board: current board
|
143 |
+
player: current player (1 or -1)
|
144 |
+
|
145 |
+
Returns:
|
146 |
+
canonicalBoard: returns canonical form of board. The canonical form
|
147 |
+
should be independent of player. For e.g. in chess,
|
148 |
+
the canonical form can be chosen to be from the pov
|
149 |
+
of white. When the player is white, we can return
|
150 |
+
board as is. When the player is black, we can invert
|
151 |
+
the colors and return the board.
|
152 |
+
"""
|
153 |
+
return player*board
|
154 |
+
|
155 |
+
def getSymmetries(self, board, pi):
|
156 |
+
"""
|
157 |
+
Input:
|
158 |
+
board: current board
|
159 |
+
pi: policy vector of size self.getActionSize()
|
160 |
+
|
161 |
+
Returns:
|
162 |
+
symmForms: a list of [(board,pi)] where each tuple is a symmetrical
|
163 |
+
form of the board and the corresponding pi vector. This
|
164 |
+
is used when training the neural network from examples.
|
165 |
+
"""
|
166 |
+
assert(len(pi) == self.n**2*4) # 1 for pass
|
167 |
+
pi_board = np.reshape(pi, (4, self.n, self.n))
|
168 |
+
l = []
|
169 |
+
|
170 |
+
for i in range(1, 5):
|
171 |
+
for j in [True, False]:
|
172 |
+
newB = np.rot90(board, i)
|
173 |
+
newPi = np.rot90(pi_board, i)
|
174 |
+
if j:
|
175 |
+
newB = np.fliplr(newB)
|
176 |
+
newPi = np.fliplr(newPi)
|
177 |
+
l += [(newB, list(newPi.ravel()))]
|
178 |
+
return l
|
179 |
+
|
180 |
+
def stringRepresentation(self, board):
|
181 |
+
"""
|
182 |
+
Input:
|
183 |
+
board: current board
|
184 |
+
|
185 |
+
Returns:
|
186 |
+
boardString: a quick conversion of board to a string format.
|
187 |
+
Required by MCTS for hashing.
|
188 |
+
"""
|
189 |
+
return board.tostring()
|
190 |
+
|
191 |
+
@staticmethod
|
192 |
+
def display(board):
|
193 |
+
n = board.shape[0]
|
194 |
+
print(" ", end="")
|
195 |
+
for y in range(n):
|
196 |
+
print(y, end=" ")
|
197 |
+
print("")
|
198 |
+
print("-----------------------")
|
199 |
+
for y in range(n):
|
200 |
+
print(y, "|", end="") # print the row #
|
201 |
+
for x in range(n):
|
202 |
+
piece = board[y][x] # get the piece to print
|
203 |
+
print(NoOneGame.square_content[piece], end=" ")
|
204 |
+
print("|")
|
205 |
+
|
206 |
+
print("-----------------------")
|
no_one/NoOneLogic.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Board:
|
2 |
+
"""
|
3 |
+
(1 for white, -1 for black, 0 for empty spaces)"""
|
4 |
+
|
5 |
+
__directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
|
6 |
+
__readable_directions = ['down', 'right', 'up', 'left']
|
7 |
+
|
8 |
+
@staticmethod
|
9 |
+
def readableDirection(i):
|
10 |
+
return Board.__readable_directions[i]
|
11 |
+
|
12 |
+
def __init__(self, n):
|
13 |
+
self.n = n
|
14 |
+
self.steps = 0
|
15 |
+
self.max_steps = 1000
|
16 |
+
self.pieces = [[0 for _ in range(n)] for _ in range(n)]
|
17 |
+
# setup the initial board
|
18 |
+
for i in range(n):
|
19 |
+
self.pieces[0][i] = 1
|
20 |
+
self.pieces[-1][i] = -1
|
21 |
+
|
22 |
+
def count(self, color):
|
23 |
+
count = 0
|
24 |
+
for x in range(self.n):
|
25 |
+
for y in range(self.n):
|
26 |
+
if self.pieces[x][y] == color:
|
27 |
+
count += 1
|
28 |
+
return count
|
29 |
+
|
30 |
+
def reach_max_steps(self):
|
31 |
+
print("steps:", self.steps)
|
32 |
+
return self.steps >= self.max_steps
|
33 |
+
|
34 |
+
def get_legal_moves(self, color):
|
35 |
+
"""Return all the legal moves for the given color.
|
36 |
+
"""
|
37 |
+
moves = set()
|
38 |
+
for i in range(self.n):
|
39 |
+
for j in range(self.n):
|
40 |
+
if self.pieces[i][j] == color:
|
41 |
+
newmoves = self.get_legal_moves_from((i, j))
|
42 |
+
for m in newmoves:
|
43 |
+
moves.add(((i, j), m))
|
44 |
+
return list(moves)
|
45 |
+
|
46 |
+
def get_legal_moves_from(self, p):
|
47 |
+
moves = set()
|
48 |
+
for i, direction in enumerate(self.__directions):
|
49 |
+
next_p = self.next_position(p, direction)
|
50 |
+
if next_p is None:
|
51 |
+
continue
|
52 |
+
if self.pieces[next_p[0]][next_p[1]] == 0:
|
53 |
+
moves.add(i)
|
54 |
+
return list(moves)
|
55 |
+
|
56 |
+
def execute_move(self, move, color):
|
57 |
+
self.steps += 1
|
58 |
+
(i, j), direction = move
|
59 |
+
(x, y) = self.next_position((i, j), self.__directions[direction])
|
60 |
+
if self.pieces[x][y] != 0:
|
61 |
+
raise ValueError(f'Invalid move: {move}')
|
62 |
+
self.pieces[i][j] = 0
|
63 |
+
self.pieces[x][y] = color
|
64 |
+
self.capture_pieces((x, y), color)
|
65 |
+
|
66 |
+
if len(self.get_legal_moves(-color)) == 0:
|
67 |
+
self.remove_all(-color)
|
68 |
+
|
69 |
+
def remove_all(self, color):
|
70 |
+
for i in range(self.n):
|
71 |
+
for j in range(self.n):
|
72 |
+
if self.pieces[i][j] == color:
|
73 |
+
self.pieces[i][j] = 0
|
74 |
+
|
75 |
+
def capture_pieces(self, pos, color):
|
76 |
+
"""
|
77 |
+
pos: current position of piece moved"""
|
78 |
+
for d in self.__directions:
|
79 |
+
npos = self.next_position(pos, d)
|
80 |
+
if npos is None:
|
81 |
+
continue
|
82 |
+
(x, y) = npos
|
83 |
+
if self.pieces[x][y] == color:
|
84 |
+
# two pieces of the same color
|
85 |
+
self.capture_piece(npos, d, color)
|
86 |
+
self.capture_piece(pos, (-d[0], -d[1]), color)
|
87 |
+
|
88 |
+
def capture_piece(self, pos, direction, color):
|
89 |
+
"""pos所在位置,向着direction方向上,
|
90 |
+
如果有一个对面的棋子,就可以吃掉,如果有两个就不能"""
|
91 |
+
np = self.next_position(pos, direction)
|
92 |
+
if np is None:
|
93 |
+
return
|
94 |
+
if self.pieces[np[0]][np[1]] != -color:
|
95 |
+
return
|
96 |
+
nnp = self.next_position(np, direction)
|
97 |
+
if nnp is not None:
|
98 |
+
if self.pieces[nnp[0]][nnp[1]] == -color:
|
99 |
+
return
|
100 |
+
self.pieces[np[0]][np[1]] = 0
|
101 |
+
|
102 |
+
def next_position(self, pos, direction):
|
103 |
+
x, y = pos[0] + direction[0], pos[1] + direction[1]
|
104 |
+
if x < 0 or x >= self.n or y < 0 or y >= self.n:
|
105 |
+
return None
|
106 |
+
return (x, y)
|
107 |
+
|
no_one/NoOnePlayes.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
from .NoOneLogic import Board
|
4 |
+
|
5 |
+
class RandomPlayer():
|
6 |
+
def __init__(self, game):
|
7 |
+
self.game = game
|
8 |
+
|
9 |
+
def play(self, board):
|
10 |
+
a = np.random.randint(self.game.getActionSize())
|
11 |
+
valids = self.game.getValidMoves(board, 1)
|
12 |
+
while valids[a]!=1:
|
13 |
+
a = np.random.randint(self.game.getActionSize())
|
14 |
+
return a
|
15 |
+
|
16 |
+
|
17 |
+
class HumanPlayer():
|
18 |
+
def __init__(self, game):
|
19 |
+
self.game = game
|
20 |
+
|
21 |
+
def play(self, board):
|
22 |
+
# display(board)
|
23 |
+
valid = self.game.getValidMoves(board, 1)
|
24 |
+
for i in range(len(valid)):
|
25 |
+
if valid[i]:
|
26 |
+
(pos, di) = self.game.decodeAction(i)
|
27 |
+
print("[", pos[0], pos[1], "%s(%s)" % (Board.readableDirection(di), di), end="] ")
|
28 |
+
while True:
|
29 |
+
input_move = input()
|
30 |
+
input_a = input_move.split(" ")
|
31 |
+
if len(input_a) == 3:
|
32 |
+
try:
|
33 |
+
x, y, di = [int(i) for i in input_a]
|
34 |
+
if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n) and (0 <= di) and (di < 4)):
|
35 |
+
a = self.game.encodeAction([x, y], di)
|
36 |
+
if valid[a]:
|
37 |
+
break
|
38 |
+
except ValueError:
|
39 |
+
# Input needs to be an integer
|
40 |
+
'Invalid integer'
|
41 |
+
print('Invalid move')
|
42 |
+
return a
|
no_one/__init__.py
ADDED
File without changes
|
no_one/pytorch/NNet.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
from tqdm import tqdm
|
7 |
+
|
8 |
+
sys.path.append('../../')
|
9 |
+
from utils import *
|
10 |
+
from NeuralNet import NeuralNet
|
11 |
+
|
12 |
+
import torch
|
13 |
+
import torch.optim as optim
|
14 |
+
|
15 |
+
from .NoOneNNet import NoOneNNet as onnet
|
16 |
+
|
17 |
+
args = dotdict({
|
18 |
+
'lr': 0.001,
|
19 |
+
'dropout': 0.3,
|
20 |
+
'epochs': 10,
|
21 |
+
'batch_size': 64,
|
22 |
+
'cuda': torch.cuda.is_available(),
|
23 |
+
'num_channels': 512,
|
24 |
+
})
|
25 |
+
|
26 |
+
|
27 |
+
class NNetWrapper(NeuralNet):
|
28 |
+
def __init__(self, game):
|
29 |
+
self.nnet = onnet(game, args)
|
30 |
+
self.board_x, self.board_y = game.getBoardSize()
|
31 |
+
self.action_size = game.getActionSize()
|
32 |
+
|
33 |
+
if args.cuda:
|
34 |
+
self.nnet.cuda()
|
35 |
+
|
36 |
+
def train(self, examples):
|
37 |
+
"""
|
38 |
+
examples: list of examples, each example is of form (board, pi, v)
|
39 |
+
"""
|
40 |
+
optimizer = optim.Adam(self.nnet.parameters())
|
41 |
+
|
42 |
+
for epoch in range(args.epochs):
|
43 |
+
print('EPOCH ::: ' + str(epoch + 1))
|
44 |
+
self.nnet.train()
|
45 |
+
pi_losses = AverageMeter()
|
46 |
+
v_losses = AverageMeter()
|
47 |
+
|
48 |
+
batch_count = int(len(examples) / args.batch_size)
|
49 |
+
|
50 |
+
t = tqdm(range(batch_count), desc='Training Net')
|
51 |
+
for _ in t:
|
52 |
+
sample_ids = np.random.randint(len(examples), size=args.batch_size)
|
53 |
+
boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
|
54 |
+
boards = torch.FloatTensor(np.array(boards).astype(np.float64))
|
55 |
+
target_pis = torch.FloatTensor(np.array(pis))
|
56 |
+
target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))
|
57 |
+
|
58 |
+
# predict
|
59 |
+
if args.cuda:
|
60 |
+
boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
|
61 |
+
|
62 |
+
# compute output
|
63 |
+
out_pi, out_v = self.nnet(boards)
|
64 |
+
l_pi = self.loss_pi(target_pis, out_pi)
|
65 |
+
l_v = self.loss_v(target_vs, out_v)
|
66 |
+
total_loss = l_pi + l_v
|
67 |
+
|
68 |
+
# record loss
|
69 |
+
pi_losses.update(l_pi.item(), boards.size(0))
|
70 |
+
v_losses.update(l_v.item(), boards.size(0))
|
71 |
+
t.set_postfix(Loss_pi=pi_losses, Loss_v=v_losses)
|
72 |
+
|
73 |
+
# compute gradient and do SGD step
|
74 |
+
optimizer.zero_grad()
|
75 |
+
total_loss.backward()
|
76 |
+
optimizer.step()
|
77 |
+
|
78 |
+
def predict(self, board):
|
79 |
+
"""
|
80 |
+
board: np array with board
|
81 |
+
"""
|
82 |
+
# timing
|
83 |
+
start = time.time()
|
84 |
+
|
85 |
+
# preparing input
|
86 |
+
board = torch.FloatTensor(board.astype(np.float64))
|
87 |
+
if args.cuda: board = board.contiguous().cuda()
|
88 |
+
board = board.view(1, self.board_x, self.board_y)
|
89 |
+
self.nnet.eval()
|
90 |
+
with torch.no_grad():
|
91 |
+
pi, v = self.nnet(board)
|
92 |
+
|
93 |
+
# print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))
|
94 |
+
return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0]
|
95 |
+
|
96 |
+
def loss_pi(self, targets, outputs):
|
97 |
+
return -torch.sum(targets * outputs) / targets.size()[0]
|
98 |
+
|
99 |
+
def loss_v(self, targets, outputs):
|
100 |
+
return torch.sum((targets - outputs.view(-1)) ** 2) / targets.size()[0]
|
101 |
+
|
102 |
+
def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
|
103 |
+
filepath = os.path.join(folder, filename)
|
104 |
+
if not os.path.exists(folder):
|
105 |
+
print("Checkpoint Directory does not exist! Making directory {}".format(folder))
|
106 |
+
os.mkdir(folder)
|
107 |
+
else:
|
108 |
+
print("Checkpoint Directory exists! ")
|
109 |
+
torch.save({
|
110 |
+
'state_dict': self.nnet.state_dict(),
|
111 |
+
}, filepath)
|
112 |
+
|
113 |
+
def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
|
114 |
+
# https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98
|
115 |
+
filepath = os.path.join(folder, filename)
|
116 |
+
if not os.path.exists(filepath):
|
117 |
+
raise ("No model in path {}".format(filepath))
|
118 |
+
map_location = None if args.cuda else 'cpu'
|
119 |
+
checkpoint = torch.load(filepath, map_location=map_location)
|
120 |
+
self.nnet.load_state_dict(checkpoint['state_dict'])
|
no_one/pytorch/NoOneNNet.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('..')
|
3 |
+
from utils import *
|
4 |
+
|
5 |
+
import argparse
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
import torch.optim as optim
|
10 |
+
|
11 |
+
class NoOneNNet(nn.Module):
|
12 |
+
def __init__(self, game, args):
|
13 |
+
# game params
|
14 |
+
self.board_x, self.board_y = game.getBoardSize()
|
15 |
+
self.action_size = game.getActionSize()
|
16 |
+
self.args = args
|
17 |
+
|
18 |
+
super(NoOneNNet, self).__init__()
|
19 |
+
self.conv1 = nn.Conv2d(1, args.num_channels, 3, stride=1, padding=1)
|
20 |
+
self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
|
21 |
+
self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
|
22 |
+
self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)
|
23 |
+
|
24 |
+
self.bn1 = nn.BatchNorm2d(args.num_channels)
|
25 |
+
self.bn2 = nn.BatchNorm2d(args.num_channels)
|
26 |
+
self.bn3 = nn.BatchNorm2d(args.num_channels)
|
27 |
+
self.bn4 = nn.BatchNorm2d(args.num_channels)
|
28 |
+
|
29 |
+
self.fc1 = nn.Linear(args.num_channels*(self.board_x-2)*(self.board_y-2), 1024)
|
30 |
+
self.fc_bn1 = nn.BatchNorm1d(1024)
|
31 |
+
|
32 |
+
self.fc2 = nn.Linear(1024, 512)
|
33 |
+
self.fc_bn2 = nn.BatchNorm1d(512)
|
34 |
+
|
35 |
+
self.fc3 = nn.Linear(512, self.action_size)
|
36 |
+
|
37 |
+
self.fc4 = nn.Linear(512, 1)
|
38 |
+
|
39 |
+
def forward(self, s):
|
40 |
+
# s: batch_size x board_x x board_y
|
41 |
+
s = s.view(-1, 1, self.board_x, self.board_y) # batch_size x 1 x board_x x board_y
|
42 |
+
s = F.relu(self.bn1(self.conv1(s))) # batch_size x num_channels x board_x x board_y
|
43 |
+
s = F.relu(self.bn2(self.conv2(s))) # batch_size x num_channels x board_x x board_y
|
44 |
+
s = F.relu(self.bn3(self.conv3(s))) # batch_size x num_channels x (board_x-2) x (board_y-2)
|
45 |
+
s = F.relu(self.bn4(self.conv4(s))) # batch_size x num_channels x (board_x-4) x (board_y-4)
|
46 |
+
s = s.view(-1, self.args.num_channels*(self.board_x-2)*(self.board_y-2))
|
47 |
+
|
48 |
+
s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training) # batch_size x 1024
|
49 |
+
s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training) # batch_size x 512
|
50 |
+
|
51 |
+
pi = self.fc3(s) # batch_size x action_size
|
52 |
+
v = self.fc4(s) # batch_size x 1
|
53 |
+
|
54 |
+
return F.log_softmax(pi, dim=1), torch.tanh(v)
|
no_one/pytorch/__init__.py
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==1.1.0
|
2 |
+
astunparse==1.6.3
|
3 |
+
cachetools==5.2.0
|
4 |
+
certifi==2022.5.18.1
|
5 |
+
charset-normalizer==2.0.12
|
6 |
+
coloredlogs==15.0.1
|
7 |
+
flatbuffers==1.12
|
8 |
+
gast==0.4.0
|
9 |
+
google-auth==2.8.0
|
10 |
+
google-auth-oauthlib==0.4.6
|
11 |
+
google-pasta==0.2.0
|
12 |
+
grpcio==1.46.3
|
13 |
+
h5py==3.7.0
|
14 |
+
humanfriendly==10.0
|
15 |
+
idna==3.3
|
16 |
+
importlib-metadata==4.11.4
|
17 |
+
keras==2.9.0
|
18 |
+
Keras-Preprocessing==1.1.2
|
19 |
+
libclang==14.0.1
|
20 |
+
Markdown==3.3.7
|
21 |
+
numpy
|
22 |
+
oauthlib==3.2.0
|
23 |
+
opt-einsum==3.3.0
|
24 |
+
packaging==21.3
|
25 |
+
protobuf==3.19.4
|
26 |
+
pyasn1==0.4.8
|
27 |
+
pyasn1-modules==0.2.8
|
28 |
+
pyparsing==3.0.9
|
29 |
+
requests==2.28.0
|
30 |
+
requests-oauthlib==1.3.1
|
31 |
+
rsa==4.8
|
32 |
+
six==1.16.0
|
33 |
+
tensorboard==2.9.1
|
34 |
+
tensorboard-data-server==0.6.1
|
35 |
+
tensorboard-plugin-wit==1.8.1
|
36 |
+
tensorflow==2.9.1
|
37 |
+
tensorflow-estimator==2.9.0
|
38 |
+
tensorflow-io-gcs-filesystem==0.26.0
|
39 |
+
termcolor==1.1.0
|
40 |
+
torch==1.11.0
|
41 |
+
tqdm==4.64.0
|
42 |
+
typing_extensions==4.2.0
|
43 |
+
urllib3==1.26.9
|
44 |
+
Werkzeug==2.1.2
|
45 |
+
wrapt==1.14.1
|
46 |
+
zipp==3.8.0
|
utils.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class AverageMeter(object):
|
2 |
+
"""From https://github.com/pytorch/examples/blob/master/imagenet/main.py"""
|
3 |
+
|
4 |
+
def __init__(self):
|
5 |
+
self.val = 0
|
6 |
+
self.avg = 0
|
7 |
+
self.sum = 0
|
8 |
+
self.count = 0
|
9 |
+
|
10 |
+
def __repr__(self):
|
11 |
+
return f'{self.avg:.2e}'
|
12 |
+
|
13 |
+
def update(self, val, n=1):
|
14 |
+
self.val = val
|
15 |
+
self.sum += val * n
|
16 |
+
self.count += n
|
17 |
+
self.avg = self.sum / self.count
|
18 |
+
|
19 |
+
|
20 |
+
class dotdict(dict):
|
21 |
+
def __getattr__(self, name):
|
22 |
+
return self[name]
|