nullne commited on
Commit
eb34cec
1 Parent(s): 71b7125

Add application file

Browse files

it works

player settings

fix bug

refine the settings and add help doc

fix param

.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pth.tar filter=lfs diff=lfs merge=lfs -text
Arena.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from tqdm import tqdm
4
+
5
+ log = logging.getLogger(__name__)
6
+
7
+
8
+ class Arena():
9
+ """
10
+ An Arena class where any 2 agents can be pit against each other.
11
+ """
12
+
13
+ def __init__(self, player1, player2, game, display=None):
14
+ """
15
+ Input:
16
+ player 1,2: two functions that takes board as input, return action
17
+ game: Game object
18
+ display: a function that takes board as input and prints it (e.g.
19
+ display in othello/OthelloGame). Is necessary for verbose
20
+ mode.
21
+
22
+ see othello/OthelloPlayers.py for an example. See pit.py for pitting
23
+ human players/other baselines with each other.
24
+ """
25
+ self.player1 = player1
26
+ self.player2 = player2
27
+ self.game = game
28
+ self.display = display
29
+
30
+ def playGame(self, verbose=False):
31
+ """
32
+ Executes one episode of a game.
33
+
34
+ Returns:
35
+ either
36
+ winner: player who won the game (1 if player1, -1 if player2)
37
+ or
38
+ draw result returned from the game that is neither 1, -1, nor 0.
39
+ """
40
+ players = [self.player2, None, self.player1]
41
+ curPlayer = 1
42
+ board = self.game.getInitBoard()
43
+ it = 0
44
+ while self.game.getGameEnded(board, curPlayer) == 0:
45
+ if it > 100:
46
+ return 0.01
47
+ it += 1
48
+ if verbose:
49
+ assert self.display
50
+ print("Turn ", str(it), "Player ", str(curPlayer))
51
+ self.display(board)
52
+ action = players[curPlayer + 1](self.game.getCanonicalForm(board, curPlayer))
53
+
54
+ valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)
55
+
56
+ if valids[action] == 0:
57
+ log.error(f'Action {action} is not valid!')
58
+ log.debug(f'valids = {valids}')
59
+ assert valids[action] > 0
60
+ board, curPlayer = self.game.getNextState(board, curPlayer, action)
61
+ if verbose:
62
+ assert self.display
63
+ print("Game over: Turn ", str(it), "Result ", str(self.game.getGameEnded(board, 1)))
64
+ self.display(board)
65
+ return curPlayer * self.game.getGameEnded(board, curPlayer)
66
+
67
+ def playGames(self, num, verbose=False):
68
+ """
69
+ Plays num games in which player1 starts num/2 games and player2 starts
70
+ num/2 games.
71
+
72
+ Returns:
73
+ oneWon: games won by player1
74
+ twoWon: games won by player2
75
+ draws: games won by nobody
76
+ """
77
+
78
+ num = int(num / 2)
79
+ oneWon = 0
80
+ twoWon = 0
81
+ draws = 0
82
+ for _ in tqdm(range(num), desc="Arena.playGames (1)"):
83
+ gameResult = self.playGame(verbose=verbose)
84
+ if gameResult == 1:
85
+ oneWon += 1
86
+ elif gameResult == -1:
87
+ twoWon += 1
88
+ else:
89
+ draws += 1
90
+
91
+ self.player1, self.player2 = self.player2, self.player1
92
+
93
+ for _ in tqdm(range(num), desc="Arena.playGames (2)"):
94
+ gameResult = self.playGame(verbose=verbose)
95
+ if gameResult == -1:
96
+ oneWon += 1
97
+ elif gameResult == 1:
98
+ twoWon += 1
99
+ else:
100
+ draws += 1
101
+
102
+ return oneWon, twoWon, draws
Game.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Game():
2
+ """
3
+ This class specifies the base Game class. To define your own game, subclass
4
+ this class and implement the functions below. This works when the game is
5
+ two-player, adversarial and turn-based.
6
+
7
+ Use 1 for player1 and -1 for player2.
8
+
9
+ See othello/OthelloGame.py for an example implementation.
10
+ """
11
+ def __init__(self):
12
+ pass
13
+
14
+ def getInitBoard(self):
15
+ """
16
+ Returns:
17
+ startBoard: a representation of the board (ideally this is the form
18
+ that will be the input to your neural network)
19
+ """
20
+ pass
21
+
22
+ def getBoardSize(self):
23
+ """
24
+ Returns:
25
+ (x,y): a tuple of board dimensions
26
+ """
27
+ pass
28
+
29
+ def getActionSize(self):
30
+ """
31
+ Returns:
32
+ actionSize: number of all possible actions
33
+ """
34
+ pass
35
+
36
+ def getNextState(self, board, player, action):
37
+ """
38
+ Input:
39
+ board: current board
40
+ player: current player (1 or -1)
41
+ action: action taken by current player
42
+
43
+ Returns:
44
+ nextBoard: board after applying action
45
+ nextPlayer: player who plays in the next turn (should be -player)
46
+ """
47
+ pass
48
+
49
+ def getValidMoves(self, board, player):
50
+ """
51
+ Input:
52
+ board: current board
53
+ player: current player
54
+
55
+ Returns:
56
+ validMoves: a binary vector of length self.getActionSize(), 1 for
57
+ moves that are valid from the current board and player,
58
+ 0 for invalid moves
59
+ """
60
+ pass
61
+
62
+ def getGameEnded(self, board, player):
63
+ """
64
+ Input:
65
+ board: current board
66
+ player: current player (1 or -1)
67
+
68
+ Returns:
69
+ r: 0 if game has not ended. 1 if player won, -1 if player lost,
70
+ small non-zero value for draw.
71
+
72
+ """
73
+ pass
74
+
75
+ def getCanonicalForm(self, board, player):
76
+ """
77
+ Input:
78
+ board: current board
79
+ player: current player (1 or -1)
80
+
81
+ Returns:
82
+ canonicalBoard: returns canonical form of board. The canonical form
83
+ should be independent of player. For e.g. in chess,
84
+ the canonical form can be chosen to be from the pov
85
+ of white. When the player is white, we can return
86
+ board as is. When the player is black, we can invert
87
+ the colors and return the board.
88
+ """
89
+ pass
90
+
91
+ def getSymmetries(self, board, pi):
92
+ """
93
+ Input:
94
+ board: current board
95
+ pi: policy vector of size self.getActionSize()
96
+
97
+ Returns:
98
+ symmForms: a list of [(board,pi)] where each tuple is a symmetrical
99
+ form of the board and the corresponding pi vector. This
100
+ is used when training the neural network from examples.
101
+ """
102
+ pass
103
+
104
+ def stringRepresentation(self, board):
105
+ """
106
+ Input:
107
+ board: current board
108
+
109
+ Returns:
110
+ boardString: a quick conversion of board to a string format.
111
+ Required by MCTS for hashing.
112
+ """
113
+ pass
MCTS.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import math
3
+
4
+ import numpy as np
5
+
6
+
7
+ # from no_one.NoOnePlayes import HumanPlayer
8
+
9
+ EPS = 1e-8
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ class MCTS():
15
+ """
16
+ This class handles the MCTS tree.
17
+ """
18
+
19
+ def __init__(self, game, nnet, args):
20
+ self.game = game
21
+ self.nnet = nnet
22
+ self.args = args
23
+ self.Qsa = {} # stores Q values for s,a (as defined in the paper)
24
+ self.Nsa = {} # stores #times edge s,a was visited
25
+ self.Ns = {} # stores #times board s was visited
26
+ self.Ps = {} # stores initial policy (returned by neural net)
27
+
28
+ self.Es = {} # stores game.getGameEnded ended for board s
29
+ self.Vs = {} # stores game.getValidMoves for board s
30
+
31
+ def getActionProb(self, canonicalBoard, temp=1):
32
+ """
33
+ This function performs numMCTSSims simulations of MCTS starting from
34
+ canonicalBoard.
35
+
36
+ Returns:
37
+ probs: a policy vector where the probability of the ith action is
38
+ proportional to Nsa[(s,a)]**(1./temp)
39
+ """
40
+ for i in range(self.args.numMCTSSims):
41
+ # self.search(canonicalBoard)
42
+ self.game.reset_steps()
43
+ self.search(canonicalBoard)
44
+
45
+ s = self.game.stringRepresentation(canonicalBoard)
46
+ counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
47
+
48
+ if temp == 0:
49
+ bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()
50
+ bestA = np.random.choice(bestAs)
51
+ probs = [0] * len(counts)
52
+ probs[bestA] = 1
53
+ return probs
54
+
55
+ counts = [x ** (1. / temp) for x in counts]
56
+ counts_sum = float(sum(counts))
57
+ if counts_sum == 0:
58
+ print(len(counts))
59
+ probs = [x / counts_sum for x in counts]
60
+ return probs
61
+
62
+ def search_iterate(self, canonicalBoard):
63
+ stack = [(0, (canonicalBoard,))] # Stack of (state, depth, parent_index)
64
+ results = [] # To store the results of leaf or terminal nodes
65
+
66
+ while stack:
67
+ st, sv = stack.pop()
68
+ if st == 0:
69
+ result, ns = self.search_iterate_st0(sv[0])
70
+ if result is not None:
71
+ results.append(result)
72
+ if ns is not None:
73
+ stack.append((1, (ns[1], ns[2])))
74
+ stack.append((0, (ns[0],)))
75
+ elif st == 1:
76
+ v = results.pop()
77
+ v = self.search_iterate_update(v, sv[0], sv[1])
78
+ results.append(v)
79
+ else:
80
+ raise ValueError("Invalid state")
81
+ return results.pop()
82
+
83
+ def search_iterate_st0(self, canonicalBoard):
84
+ s = self.game.stringRepresentation(canonicalBoard)
85
+
86
+ if s not in self.Es:
87
+ self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
88
+ if self.Es[s] != 0:
89
+ result = -self.Es[s]
90
+ return result, None
91
+ if s not in self.Ps:
92
+ # leaf node
93
+ self.Ps[s], v = self.nnet.predict(canonicalBoard)
94
+ valids = self.game.getValidMoves(canonicalBoard, 1)
95
+ self.Ps[s] = self.Ps[s] * valids
96
+ sum_Ps_s = np.sum(self.Ps[s])
97
+ if sum_Ps_s > 0:
98
+ self.Ps[s] /= sum_Ps_s
99
+ else:
100
+ self.Ps[s] = self.Ps[s] + valids
101
+ self.Ps[s] /= np.sum(self.Ps[s])
102
+
103
+ self.Vs[s] = valids
104
+ self.Ns[s] = 0
105
+
106
+ return -v, None
107
+
108
+ valids = self.Vs[s]
109
+ cur_best = -float('inf')
110
+ best_act = -1
111
+
112
+ for a in range(self.game.getActionSize()):
113
+ if valids[a]:
114
+ if (s, a) in self.Qsa:
115
+ u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (1 + self.Nsa[(s, a)])
116
+ else:
117
+ u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS)
118
+
119
+ if u > cur_best:
120
+ cur_best = u
121
+ best_act = a
122
+
123
+ next_s, next_player = self.game.getNextState(canonicalBoard, 1, best_act)
124
+ next_s = self.game.getCanonicalForm(next_s, next_player)
125
+
126
+ return None, (next_s, s, best_act)
127
+
128
+ # index = len(results) # Current index in results
129
+ # stack.append((next_s, depth + 1, index))
130
+
131
+ # # Backpropagate results
132
+ # for v, parent_index in reversed(results):
133
+ # if parent_index is not None:
134
+ # parent_v, _ = results[parent_index]
135
+ # results[parent_index] = ((parent_v * self.Ns[s] + v) / (self.Ns[s] + 1), _)
136
+ # self.Ns[s] += 1
137
+
138
+ # # Update Qsa and Nsa based on backpropagation
139
+ # for i, (v, parent_index) in enumerate(results):
140
+ # if parent_index is not None: # Ignore root
141
+ # _, action = stack[i] # Assuming we also pushed actions to stack
142
+ # if (s, action) in self.Qsa:
143
+ # self.Qsa[(s, action)] = (self.Nsa[(s, action)] * self.Qsa[(s, action)] + v) / (self.Nsa[(s, action)] + 1)
144
+ # self.Nsa[(s, action)] += 1
145
+ # else:
146
+ # self.Qsa[(s, action)] = v
147
+ # self.Nsa[(s, action)] = 1
148
+
149
+ # return -results[0][0] # Return the negated value of the root node
150
+
151
+ def search_iterate_update(self, v, s, a):
152
+ if (s, a) in self.Qsa:
153
+ self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
154
+ self.Nsa[(s, a)] += 1
155
+
156
+ else:
157
+ self.Qsa[(s, a)] = v
158
+ self.Nsa[(s, a)] = 1
159
+
160
+ self.Ns[s] += 1
161
+ return -v
162
+
163
+ def search(self, canonicalBoard, depth=0):
164
+ """
165
+ This function performs one iteration of MCTS. It is recursively called
166
+ till a leaf node is found. The action chosen at each node is one that
167
+ has the maximum upper confidence bound as in the paper.
168
+
169
+ Once a leaf node is found, the neural network is called to return an
170
+ initial policy P and a value v for the state. This value is propagated
171
+ up the search path. In case the leaf node is a terminal state, the
172
+ outcome is propagated up the search path. The values of Ns, Nsa, Qsa are
173
+ updated.
174
+
175
+ NOTE: the return values are the negative of the value of the current
176
+ state. This is done since v is in [-1,1] and if v is the value of a
177
+ state for the current player, then its value is -v for the other player.
178
+
179
+ Returns:
180
+ v: the negative of the value of the current canonicalBoard
181
+ """
182
+
183
+ s = self.game.stringRepresentation(canonicalBoard)
184
+
185
+ if s not in self.Es:
186
+ self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
187
+ if self.Es[s] != 0:
188
+ # terminal node
189
+ return -self.Es[s]
190
+
191
+ if s not in self.Ps:
192
+ # leaf node
193
+ self.Ps[s], v = self.nnet.predict(canonicalBoard)
194
+ valids = self.game.getValidMoves(canonicalBoard, 1)
195
+ self.Ps[s] = self.Ps[s] * valids # masking invalid moves
196
+ sum_Ps_s = np.sum(self.Ps[s])
197
+ if sum_Ps_s > 0:
198
+ self.Ps[s] /= sum_Ps_s # renormalize
199
+ else:
200
+ # if all valid moves were masked make all valid moves equally probable
201
+
202
+ # NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
203
+ # If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
204
+ log.error("All valid moves were masked, doing a workaround.")
205
+ self.Ps[s] = self.Ps[s] + valids
206
+ self.Ps[s] /= np.sum(self.Ps[s])
207
+
208
+ self.Vs[s] = valids
209
+ self.Ns[s] = 0
210
+ return -v
211
+
212
+ valids = self.Vs[s]
213
+ cur_best = -float('inf')
214
+ best_act = -1
215
+
216
+ # pick the action with the highest upper confidence bound
217
+ for a in range(self.game.getActionSize()):
218
+ if valids[a]:
219
+ if (s, a) in self.Qsa:
220
+ u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
221
+ 1 + self.Nsa[(s, a)])
222
+ else:
223
+ u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS) # Q = 0 ?
224
+
225
+ if u > cur_best:
226
+ cur_best = u
227
+ best_act = a
228
+
229
+ a = best_act
230
+
231
+ if depth > 100:
232
+ candidates = self.game.getValidMoves(canonicalBoard, 1)
233
+ a = np.random.choice([i for i in range(len(candidates)) if candidates[i] == 1])
234
+ # self.game.display(canonicalBoard)
235
+ # human_player = HumanPlayer(self.game)
236
+ # a = human_player.play(canonicalBoard)
237
+ depth = 80
238
+
239
+
240
+ next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
241
+ next_s = self.game.getCanonicalForm(next_s, next_player)
242
+ # print("*", end="")
243
+ # self.game.display(next_s)
244
+
245
+ v = self.search(next_s, depth=depth + 1)
246
+
247
+ if (s, a) in self.Qsa:
248
+ self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
249
+ self.Nsa[(s, a)] += 1
250
+
251
+ else:
252
+ self.Qsa[(s, a)] = v
253
+ self.Nsa[(s, a)] = 1
254
+
255
+ self.Ns[s] += 1
256
+ return -v
NeuralNet.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class NeuralNet():
2
+ """
3
+ This class specifies the base NeuralNet class. To define your own neural
4
+ network, subclass this class and implement the functions below. The neural
5
+ network does not consider the current player, and instead only deals with
6
+ the canonical form of the board.
7
+
8
+ See othello/NNet.py for an example implementation.
9
+ """
10
+
11
+ def __init__(self, game):
12
+ pass
13
+
14
+ def train(self, examples):
15
+ """
16
+ This function trains the neural network with examples obtained from
17
+ self-play.
18
+
19
+ Input:
20
+ examples: a list of training examples, where each example is of form
21
+ (board, pi, v). pi is the MCTS informed policy vector for
22
+ the given board, and v is its value. The examples has
23
+ board in its canonical form.
24
+ """
25
+ pass
26
+
27
+ def predict(self, board):
28
+ """
29
+ Input:
30
+ board: current board in its canonical form.
31
+
32
+ Returns:
33
+ pi: a policy vector for the current board- a numpy array of length
34
+ game.getActionSize
35
+ v: a float in [-1,1] that gives the value of the current board
36
+ """
37
+ pass
38
+
39
+ def save_checkpoint(self, folder, filename):
40
+ """
41
+ Saves the current neural network (with its parameters) in
42
+ folder/filename
43
+ """
44
+ pass
45
+
46
+ def load_checkpoint(self, folder, filename):
47
+ """
48
+ Loads parameters of the neural network from folder/filename
49
+ """
50
+ pass
app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+
4
+ import Arena
5
+
6
+ from MCTS import MCTS
7
+
8
+ from no_one.NoOneGame import NoOneGame
9
+
10
+
11
+ from no_one.pytorch.NNet import NNetWrapper as NNet
12
+ from utils import *
13
+
14
+ import time
15
+
16
+
17
+ square_content = {
18
+ -1: "❌",
19
+ +0: "·",
20
+ +1: "⭕"
21
+ }
22
+
23
+ players = [{"name": "❌"}, {"name": "⭕"}]
24
+ player_types = [":rainbow[AI]", "Human"]
25
+ ai_ranks = ["Super", "First Blood", "Random"]
26
+ ai_models = {
27
+ "Super": "super.pth.tar",
28
+ "First Blood": "firstBlood.pth.tar",
29
+ "Random": "random.pth.tar",
30
+ }
31
+
32
+ game = NoOneGame(4)
33
+
34
+ def check_clicked(i, j):
35
+ if st.session_state.clicked == (i, j):
36
+ return True
37
+ return False
38
+
39
+ class HumanPlayer:
40
+ def __init__(self, game):
41
+ self.game = game
42
+ def encode(self, src, target):
43
+ row, col = src[0] - target[0], src[1] - target[1]
44
+ if (row, col) == (0, -1):
45
+ d = 1
46
+ elif (row, col) == (0, 1):
47
+ d = 3
48
+ elif (row, col) == (-1, 0):
49
+ d = 0
50
+ elif (row, col) == (1, 0):
51
+ d = 2
52
+ else:
53
+ return None
54
+ return self.game.encodeAction(src, d)
55
+ def play(self, board, player, src, target):
56
+ action = self.encode(src, target)
57
+ if action is None:
58
+ return None
59
+ valids = self.game.getValidMoves(board, player)
60
+ if valids[action] != 1:
61
+ return None
62
+ return action
63
+
64
+
65
+ def ai_player(ai_model):
66
+ n1 = NNet(game)
67
+ n1.load_checkpoint('./models/', ai_models[ai_model])
68
+ args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
69
+ msts1 = MCTS(game, n1, args1)
70
+ return lambda x: np.argmax(msts1.getActionProb(x, temp=0))
71
+
72
+
73
+ def human_step(i, j):
74
+ if st.session_state.clicked is None:
75
+ if st.session_state.board[i, j] != st.session_state.player:
76
+ st.toast('Invalid move!')
77
+ return
78
+ st.session_state.clicked = (i, j)
79
+ return
80
+ elif st.session_state.clicked == (i, j):
81
+ # selection canceled
82
+ st.session_state.clicked = None
83
+ return
84
+ elif st.session_state.board[i, j] == st.session_state.player:
85
+ st.session_state.clicked = (i, j)
86
+ return
87
+ else:
88
+ # move
89
+ player = st.session_state.players[st.session_state.player + 1]
90
+ action = player.play(st.session_state.board, st.session_state.player, st.session_state.clicked, (i, j))
91
+ if action is None:
92
+ st.toast('Invalid move!')
93
+ return
94
+ move(action)
95
+ st.session_state.clicked = None
96
+
97
+
98
+ def ai_step():
99
+ p = st.session_state.player
100
+ action = st.session_state.players[p+1](game.getCanonicalForm(st.session_state.board, st.session_state.player))
101
+ # st.session_state.board, st.session_state.player = game.getNextState(
102
+ # st.session_state.board, p, action,
103
+ # )
104
+ move(action)
105
+
106
+
107
+ def move(action):
108
+ p = st.session_state.player
109
+ st.session_state.board, st.session_state.player = game.getNextState(
110
+ st.session_state.board, p, action,
111
+ )
112
+
113
+ res = game.getGameEnded(st.session_state.board, p)
114
+ if res != 0:
115
+ st.session_state.winner = res*p
116
+ st.session_state.win[res*p] += 1
117
+ st.balloons()
118
+
119
+ def reinit():
120
+ st.session_state.reinit = True
121
+
122
+ def init(post_init=False):
123
+ st.session_state.reinit = False
124
+ if not post_init:
125
+ st.session_state.win = {-1: 0, 1: 0}
126
+
127
+ st.session_state.board = game.getInitBoard()
128
+ st.session_state.player = -1
129
+ st.session_state.players = [None for i in range(2)]
130
+ for i, setting in enumerate(st.session_state.player_settings):
131
+ if setting["pt"] == player_types[1]:
132
+ st.session_state.players[i] = HumanPlayer(game)
133
+ else:
134
+ st.session_state.players[i] = ai_player(setting["ai_model"])
135
+ st.session_state.players.insert(1, None)
136
+
137
+ st.session_state.winner = None
138
+ st.session_state.clicked = None
139
+
140
+
141
+ def player_to_index(p):
142
+ if p == -1:
143
+ return 0
144
+ elif p == 1:
145
+ return 1
146
+
147
+
148
+ def main():
149
+ if "player_settings" not in st.session_state:
150
+ st.session_state.player_settings = [
151
+ {"pt": player_types[1], "ai_model": ai_ranks[0]},
152
+ {"pt": player_types[0], "ai_model": ai_ranks[1]},
153
+ ]
154
+ if "reinit" not in st.session_state:
155
+ st.session_state.reinit = False
156
+
157
+ fire, settings = st.columns([1, 2])
158
+ fire.button('New Game', on_click=init, args=(True,))
159
+ with settings.expander(
160
+ 'Settings',
161
+ expanded=False,
162
+ ):
163
+ st.warning('Any setting changing will restart the game immediately', icon="⚠️")
164
+ for i, p in enumerate(st.columns([0.5, 0.5])):
165
+ with p:
166
+ # players[i]
167
+ st.session_state.player_settings[i]["pt"] = st.radio(
168
+ f"Who will play %s" % players[i]["name"],
169
+ player_types,
170
+ key=f"xp_type_{i}",
171
+ horizontal=True,
172
+ index=player_types.index(st.session_state.player_settings[i]["pt"]),
173
+ on_change=reinit,
174
+ )
175
+ if st.session_state.player_settings[i]["pt"] == player_types[0]:
176
+ st.session_state.player_settings[i]["ai_model"] = st.radio(
177
+ "AI rank",
178
+ ai_ranks,
179
+ key=f"xp_rank_{i}",
180
+ index=ai_ranks.index(st.session_state.player_settings[i]["ai_model"]),
181
+ on_change=reinit,
182
+ )
183
+
184
+ st.divider()
185
+ st.checkbox("Show me how to play", key="show_how_to_play")
186
+
187
+
188
+ if "board" not in st.session_state or st.session_state.reinit:
189
+ init()
190
+
191
+ if st.session_state.show_how_to_play:
192
+ with st.sidebar:
193
+ st.title("How to play")
194
+ how_to = '''1. 游戏的目标是吃掉对方的棋子
195
+ 开局双方各有四枚棋子,被吃剩一枚棋子即可判负
196
+ 2. 棋子可以向上下左右移动到空白位置
197
+ 3. 如何吃子
198
+ 1. 移动后的棋子需要跟本方其他某个棋子,在水平方向,或者垂直方向上连住
199
+ 2. 如果连住后的棋子两端有对方一个棋子,那么这个对方的棋子就被吃掉
200
+ 3. 如果对方也有两颗棋子,则互相不吃
201
+ 4. 如果落子在对方连起来的两枚棋子一端,也不会被吃
202
+ 5. 如果选择 AI vs AI,需要手动点击比分下面的回合按钮,触发下一次落子
203
+ '''
204
+ st.markdown(how_to)
205
+
206
+
207
+ xp, score, op = st.columns([2, 8, 2])
208
+ for i, p in enumerate([xp, op]):
209
+ p.title(players[i]["name"], anchor=False)
210
+ caption = st.session_state.player_settings[i]["pt"]
211
+ if st.session_state.player_settings[i]["pt"] == player_types[0]:
212
+ caption = st.session_state.player_settings[i]["ai_model"] + " " + caption
213
+ p.caption(caption)
214
+
215
+ # st.session_state
216
+
217
+ if st.session_state.player_settings[player_to_index(st.session_state.player)]['pt'] != "Human" and st.session_state.winner is None:
218
+ ai_step()
219
+
220
+ st.divider()
221
+
222
+ for i, row in enumerate(st.session_state.board):
223
+ cols = st.columns([5, 1, 1, 1, 1, 5])
224
+ for j, field in enumerate(row):
225
+ cols[j + 1].button(
226
+ square_content[field],
227
+ key=f"{i}-{j}",
228
+ type=f'{"primary" if check_clicked(i, j) else "secondary"}',
229
+ on_click=human_step,
230
+ args=(i, j),
231
+ )
232
+
233
+ s = score.columns([2, 2, 2])
234
+ s[1].title(f'{st.session_state.win[-1]} : {st.session_state.win[1]}', anchor=False)
235
+ s[1].button(
236
+ f'{"❌" if st.session_state.player == -1 else "⭕"}\'s turn'
237
+ if not st.session_state.winner
238
+ else f'🏁 Game finished'
239
+ )
240
+
241
+
242
+
243
+ if __name__ == '__main__':
244
+ main()
models/firstBlood.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944608ae57a59c24f3c5ab6ed4cb7f387903154bf97d6a57cbf207747a55f304
3
+ size 39033691
models/random.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac3537ec1644bf08ae41488d7a98fdcf1637ff6009de01d36b1edb4cc0e8c53
3
+ size 39033691
models/super.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e24aa62ce56a68e74d78593e487d38a4a6a65917b05f8a684a33274072edf4a0
3
+ size 39034331
no_one/NoOneGame.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+ import sys
3
+ sys.path.append('..')
4
+ from Game import Game
5
+ from .NoOneLogic import Board
6
+ import numpy as np
7
+
8
+ class NoOneGame(Game):
9
+ """
10
+ This class specifies the base Game class. To define your own game, subclass
11
+ this class and implement the functions below. This works when the game is
12
+ two-player, adversarial and turn-based.
13
+
14
+ Use 1 for player1 and -1 for player2.
15
+
16
+ See othello/OthelloGame.py for an example implementation.
17
+ """
18
+
19
+ square_content = {
20
+ -1: "X",
21
+ +0: "-",
22
+ +1: "O"
23
+ }
24
+ def __init__(self, n):
25
+ self.n = n
26
+ self.steps = 0
27
+
28
+ def reach_max_steps(self):
29
+ return self.steps >= 500
30
+
31
+ def reset_steps(self):
32
+ self.steps = 0
33
+
34
+ def getInitBoard(self):
35
+ """
36
+ Returns:
37
+ startBoard: a representation of the board (ideally this is the form
38
+ that will be the input to your neural network)
39
+ """
40
+ b = Board(self.n)
41
+ return np.array(b.pieces)
42
+
43
+ def getBoardSize(self):
44
+ """
45
+ Returns:
46
+ (x,y): a tuple of board dimensions
47
+ """
48
+ return (self.n, self.n)
49
+
50
+ def getActionSize(self):
51
+ """
52
+ Returns:
53
+ actionSize: number of all possible actions
54
+ """
55
+ return self.n * self.n * 4
56
+
57
+ def decodeAction(self, action):
58
+ i = action // (self.n * self.n)
59
+ x = (action % (self.n * self.n)) % self.n
60
+ y = (action % (self.n * self.n)) // self.n
61
+ return ([x, y], i)
62
+
63
+ def encodeAction(self, pos, i):
64
+ return self.n*self.n*i + self.n*pos[1] + pos[0]
65
+
66
+ def getNextState(self, board, player, action):
67
+ """
68
+ Input:
69
+ board: current board
70
+ player: current player (1 or -1)
71
+ action: action taken by current player
72
+
73
+ Returns:
74
+ nextBoard: board after applying action
75
+ nextPlayer: player who plays in the next turn (should be -player)
76
+ """
77
+
78
+ (pos, i) = self.decodeAction(action)
79
+
80
+ b = Board(self.n)
81
+ b.pieces = np.copy(board)
82
+ move = (pos, i)
83
+ b.execute_move(move, player)
84
+ # self.display(b.pieces)
85
+ self.steps += 1
86
+ return (b.pieces, -player)
87
+
88
+ def getValidMoves(self, board, player):
89
+ """
90
+ Input:
91
+ board: current board
92
+ player: current player
93
+
94
+ Returns:
95
+ validMoves: a binary vector of length self.getActionSize(), 1 for
96
+ moves that are valid from the current board and player,
97
+ 0 for invalid moves
98
+ """
99
+ valids = [0]*self.getActionSize()
100
+ b = Board(self.n)
101
+ b.pieces = np.copy(board)
102
+ legalMoves = b.get_legal_moves(player)
103
+ for (pos, i) in legalMoves:
104
+ valids[self.encodeAction(pos, i)] = 1
105
+ return np.array(valids)
106
+
107
+ def getGameEnded(self, board, player):
108
+ """
109
+ Input:
110
+ board: current board
111
+ player: current player (1 or -1)
112
+
113
+ Returns:
114
+ r: 0 if game has not ended. 1 if player won, -1 if player lost,
115
+ small non-zero value for draw.
116
+
117
+ """
118
+ b = Board(self.n)
119
+ b.pieces = np.copy(board)
120
+
121
+ # for the early stage of training
122
+ # if b.count(player) == b.count(-player):
123
+ # return 0
124
+ # return 1 if b.count(player) > b.count(-player) else -1
125
+
126
+ if b.count(player) <= 1:
127
+ return -1
128
+ if b.count(-player) <= 1:
129
+ return 1
130
+
131
+ # if self.reach_max_steps():
132
+ # print("reach max steps")
133
+ # self.steps = 0
134
+ # if b.count(player) == b.count(-player):
135
+ # return 0.01
136
+ # return 1 if b.count(player) > b.count(-player) else -1
137
+ return 0
138
+
139
+ def getCanonicalForm(self, board, player):
140
+ """
141
+ Input:
142
+ board: current board
143
+ player: current player (1 or -1)
144
+
145
+ Returns:
146
+ canonicalBoard: returns canonical form of board. The canonical form
147
+ should be independent of player. For e.g. in chess,
148
+ the canonical form can be chosen to be from the pov
149
+ of white. When the player is white, we can return
150
+ board as is. When the player is black, we can invert
151
+ the colors and return the board.
152
+ """
153
+ return player*board
154
+
155
+ def getSymmetries(self, board, pi):
156
+ """
157
+ Input:
158
+ board: current board
159
+ pi: policy vector of size self.getActionSize()
160
+
161
+ Returns:
162
+ symmForms: a list of [(board,pi)] where each tuple is a symmetrical
163
+ form of the board and the corresponding pi vector. This
164
+ is used when training the neural network from examples.
165
+ """
166
+ assert(len(pi) == self.n**2*4) # 1 for pass
167
+ pi_board = np.reshape(pi, (4, self.n, self.n))
168
+ l = []
169
+
170
+ for i in range(1, 5):
171
+ for j in [True, False]:
172
+ newB = np.rot90(board, i)
173
+ newPi = np.rot90(pi_board, i)
174
+ if j:
175
+ newB = np.fliplr(newB)
176
+ newPi = np.fliplr(newPi)
177
+ l += [(newB, list(newPi.ravel()))]
178
+ return l
179
+
180
+ def stringRepresentation(self, board):
181
+ """
182
+ Input:
183
+ board: current board
184
+
185
+ Returns:
186
+ boardString: a quick conversion of board to a string format.
187
+ Required by MCTS for hashing.
188
+ """
189
+ return board.tostring()
190
+
191
+ @staticmethod
192
+ def display(board):
193
+ n = board.shape[0]
194
+ print(" ", end="")
195
+ for y in range(n):
196
+ print(y, end=" ")
197
+ print("")
198
+ print("-----------------------")
199
+ for y in range(n):
200
+ print(y, "|", end="") # print the row #
201
+ for x in range(n):
202
+ piece = board[y][x] # get the piece to print
203
+ print(NoOneGame.square_content[piece], end=" ")
204
+ print("|")
205
+
206
+ print("-----------------------")
no_one/NoOneLogic.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Board:
2
+ """
3
+ (1 for white, -1 for black, 0 for empty spaces)"""
4
+
5
+ __directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
6
+ __readable_directions = ['down', 'right', 'up', 'left']
7
+
8
+ @staticmethod
9
+ def readableDirection(i):
10
+ return Board.__readable_directions[i]
11
+
12
+ def __init__(self, n):
13
+ self.n = n
14
+ self.steps = 0
15
+ self.max_steps = 1000
16
+ self.pieces = [[0 for _ in range(n)] for _ in range(n)]
17
+ # setup the initial board
18
+ for i in range(n):
19
+ self.pieces[0][i] = 1
20
+ self.pieces[-1][i] = -1
21
+
22
+ def count(self, color):
23
+ count = 0
24
+ for x in range(self.n):
25
+ for y in range(self.n):
26
+ if self.pieces[x][y] == color:
27
+ count += 1
28
+ return count
29
+
30
+ def reach_max_steps(self):
31
+ print("steps:", self.steps)
32
+ return self.steps >= self.max_steps
33
+
34
+ def get_legal_moves(self, color):
35
+ """Return all the legal moves for the given color.
36
+ """
37
+ moves = set()
38
+ for i in range(self.n):
39
+ for j in range(self.n):
40
+ if self.pieces[i][j] == color:
41
+ newmoves = self.get_legal_moves_from((i, j))
42
+ for m in newmoves:
43
+ moves.add(((i, j), m))
44
+ return list(moves)
45
+
46
+ def get_legal_moves_from(self, p):
47
+ moves = set()
48
+ for i, direction in enumerate(self.__directions):
49
+ next_p = self.next_position(p, direction)
50
+ if next_p is None:
51
+ continue
52
+ if self.pieces[next_p[0]][next_p[1]] == 0:
53
+ moves.add(i)
54
+ return list(moves)
55
+
56
+ def execute_move(self, move, color):
57
+ self.steps += 1
58
+ (i, j), direction = move
59
+ (x, y) = self.next_position((i, j), self.__directions[direction])
60
+ if self.pieces[x][y] != 0:
61
+ raise ValueError(f'Invalid move: {move}')
62
+ self.pieces[i][j] = 0
63
+ self.pieces[x][y] = color
64
+ self.capture_pieces((x, y), color)
65
+
66
+ if len(self.get_legal_moves(-color)) == 0:
67
+ self.remove_all(-color)
68
+
69
+ def remove_all(self, color):
70
+ for i in range(self.n):
71
+ for j in range(self.n):
72
+ if self.pieces[i][j] == color:
73
+ self.pieces[i][j] = 0
74
+
75
+ def capture_pieces(self, pos, color):
76
+ """
77
+ pos: current position of piece moved"""
78
+ for d in self.__directions:
79
+ npos = self.next_position(pos, d)
80
+ if npos is None:
81
+ continue
82
+ (x, y) = npos
83
+ if self.pieces[x][y] == color:
84
+ # two pieces of the same color
85
+ self.capture_piece(npos, d, color)
86
+ self.capture_piece(pos, (-d[0], -d[1]), color)
87
+
88
+ def capture_piece(self, pos, direction, color):
89
+ """pos所在位置,向着direction方向上,
90
+ 如果有一个对面的棋子,就可以吃掉,如果有两个就不能"""
91
+ np = self.next_position(pos, direction)
92
+ if np is None:
93
+ return
94
+ if self.pieces[np[0]][np[1]] != -color:
95
+ return
96
+ nnp = self.next_position(np, direction)
97
+ if nnp is not None:
98
+ if self.pieces[nnp[0]][nnp[1]] == -color:
99
+ return
100
+ self.pieces[np[0]][np[1]] = 0
101
+
102
+ def next_position(self, pos, direction):
103
+ x, y = pos[0] + direction[0], pos[1] + direction[1]
104
+ if x < 0 or x >= self.n or y < 0 or y >= self.n:
105
+ return None
106
+ return (x, y)
107
+
no_one/NoOnePlayes.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from .NoOneLogic import Board
4
+
5
+ class RandomPlayer():
6
+ def __init__(self, game):
7
+ self.game = game
8
+
9
+ def play(self, board):
10
+ a = np.random.randint(self.game.getActionSize())
11
+ valids = self.game.getValidMoves(board, 1)
12
+ while valids[a]!=1:
13
+ a = np.random.randint(self.game.getActionSize())
14
+ return a
15
+
16
+
17
+ class HumanPlayer():
18
+ def __init__(self, game):
19
+ self.game = game
20
+
21
+ def play(self, board):
22
+ # display(board)
23
+ valid = self.game.getValidMoves(board, 1)
24
+ for i in range(len(valid)):
25
+ if valid[i]:
26
+ (pos, di) = self.game.decodeAction(i)
27
+ print("[", pos[0], pos[1], "%s(%s)" % (Board.readableDirection(di), di), end="] ")
28
+ while True:
29
+ input_move = input()
30
+ input_a = input_move.split(" ")
31
+ if len(input_a) == 3:
32
+ try:
33
+ x, y, di = [int(i) for i in input_a]
34
+ if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n) and (0 <= di) and (di < 4)):
35
+ a = self.game.encodeAction([x, y], di)
36
+ if valid[a]:
37
+ break
38
+ except ValueError:
39
+ # Input needs to be an integer
40
+ 'Invalid integer'
41
+ print('Invalid move')
42
+ return a
no_one/__init__.py ADDED
File without changes
no_one/pytorch/NNet.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+
5
+ import numpy as np
6
+ from tqdm import tqdm
7
+
8
+ sys.path.append('../../')
9
+ from utils import *
10
+ from NeuralNet import NeuralNet
11
+
12
+ import torch
13
+ import torch.optim as optim
14
+
15
+ from .NoOneNNet import NoOneNNet as onnet
16
+
17
+ args = dotdict({
18
+ 'lr': 0.001,
19
+ 'dropout': 0.3,
20
+ 'epochs': 10,
21
+ 'batch_size': 64,
22
+ 'cuda': torch.cuda.is_available(),
23
+ 'num_channels': 512,
24
+ })
25
+
26
+
27
+ class NNetWrapper(NeuralNet):
28
+ def __init__(self, game):
29
+ self.nnet = onnet(game, args)
30
+ self.board_x, self.board_y = game.getBoardSize()
31
+ self.action_size = game.getActionSize()
32
+
33
+ if args.cuda:
34
+ self.nnet.cuda()
35
+
36
+ def train(self, examples):
37
+ """
38
+ examples: list of examples, each example is of form (board, pi, v)
39
+ """
40
+ optimizer = optim.Adam(self.nnet.parameters())
41
+
42
+ for epoch in range(args.epochs):
43
+ print('EPOCH ::: ' + str(epoch + 1))
44
+ self.nnet.train()
45
+ pi_losses = AverageMeter()
46
+ v_losses = AverageMeter()
47
+
48
+ batch_count = int(len(examples) / args.batch_size)
49
+
50
+ t = tqdm(range(batch_count), desc='Training Net')
51
+ for _ in t:
52
+ sample_ids = np.random.randint(len(examples), size=args.batch_size)
53
+ boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
54
+ boards = torch.FloatTensor(np.array(boards).astype(np.float64))
55
+ target_pis = torch.FloatTensor(np.array(pis))
56
+ target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))
57
+
58
+ # predict
59
+ if args.cuda:
60
+ boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
61
+
62
+ # compute output
63
+ out_pi, out_v = self.nnet(boards)
64
+ l_pi = self.loss_pi(target_pis, out_pi)
65
+ l_v = self.loss_v(target_vs, out_v)
66
+ total_loss = l_pi + l_v
67
+
68
+ # record loss
69
+ pi_losses.update(l_pi.item(), boards.size(0))
70
+ v_losses.update(l_v.item(), boards.size(0))
71
+ t.set_postfix(Loss_pi=pi_losses, Loss_v=v_losses)
72
+
73
+ # compute gradient and do SGD step
74
+ optimizer.zero_grad()
75
+ total_loss.backward()
76
+ optimizer.step()
77
+
78
+ def predict(self, board):
79
+ """
80
+ board: np array with board
81
+ """
82
+ # timing
83
+ start = time.time()
84
+
85
+ # preparing input
86
+ board = torch.FloatTensor(board.astype(np.float64))
87
+ if args.cuda: board = board.contiguous().cuda()
88
+ board = board.view(1, self.board_x, self.board_y)
89
+ self.nnet.eval()
90
+ with torch.no_grad():
91
+ pi, v = self.nnet(board)
92
+
93
+ # print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))
94
+ return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0]
95
+
96
+ def loss_pi(self, targets, outputs):
97
+ return -torch.sum(targets * outputs) / targets.size()[0]
98
+
99
+ def loss_v(self, targets, outputs):
100
+ return torch.sum((targets - outputs.view(-1)) ** 2) / targets.size()[0]
101
+
102
+ def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
103
+ filepath = os.path.join(folder, filename)
104
+ if not os.path.exists(folder):
105
+ print("Checkpoint Directory does not exist! Making directory {}".format(folder))
106
+ os.mkdir(folder)
107
+ else:
108
+ print("Checkpoint Directory exists! ")
109
+ torch.save({
110
+ 'state_dict': self.nnet.state_dict(),
111
+ }, filepath)
112
+
113
+ def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
114
+ # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98
115
+ filepath = os.path.join(folder, filename)
116
+ if not os.path.exists(filepath):
117
+ raise ("No model in path {}".format(filepath))
118
+ map_location = None if args.cuda else 'cpu'
119
+ checkpoint = torch.load(filepath, map_location=map_location)
120
+ self.nnet.load_state_dict(checkpoint['state_dict'])
no_one/pytorch/NoOneNNet.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('..')
3
+ from utils import *
4
+
5
+ import argparse
6
+ import torch
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+ import torch.optim as optim
10
+
11
+ class NoOneNNet(nn.Module):
12
+ def __init__(self, game, args):
13
+ # game params
14
+ self.board_x, self.board_y = game.getBoardSize()
15
+ self.action_size = game.getActionSize()
16
+ self.args = args
17
+
18
+ super(NoOneNNet, self).__init__()
19
+ self.conv1 = nn.Conv2d(1, args.num_channels, 3, stride=1, padding=1)
20
+ self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
21
+ self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
22
+ self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)
23
+
24
+ self.bn1 = nn.BatchNorm2d(args.num_channels)
25
+ self.bn2 = nn.BatchNorm2d(args.num_channels)
26
+ self.bn3 = nn.BatchNorm2d(args.num_channels)
27
+ self.bn4 = nn.BatchNorm2d(args.num_channels)
28
+
29
+ self.fc1 = nn.Linear(args.num_channels*(self.board_x-2)*(self.board_y-2), 1024)
30
+ self.fc_bn1 = nn.BatchNorm1d(1024)
31
+
32
+ self.fc2 = nn.Linear(1024, 512)
33
+ self.fc_bn2 = nn.BatchNorm1d(512)
34
+
35
+ self.fc3 = nn.Linear(512, self.action_size)
36
+
37
+ self.fc4 = nn.Linear(512, 1)
38
+
39
+ def forward(self, s):
40
+ # s: batch_size x board_x x board_y
41
+ s = s.view(-1, 1, self.board_x, self.board_y) # batch_size x 1 x board_x x board_y
42
+ s = F.relu(self.bn1(self.conv1(s))) # batch_size x num_channels x board_x x board_y
43
+ s = F.relu(self.bn2(self.conv2(s))) # batch_size x num_channels x board_x x board_y
44
+ s = F.relu(self.bn3(self.conv3(s))) # batch_size x num_channels x (board_x-2) x (board_y-2)
45
+ s = F.relu(self.bn4(self.conv4(s))) # batch_size x num_channels x (board_x-4) x (board_y-4)
46
+ s = s.view(-1, self.args.num_channels*(self.board_x-2)*(self.board_y-2))
47
+
48
+ s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training) # batch_size x 1024
49
+ s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training) # batch_size x 512
50
+
51
+ pi = self.fc3(s) # batch_size x action_size
52
+ v = self.fc4(s) # batch_size x 1
53
+
54
+ return F.log_softmax(pi, dim=1), torch.tanh(v)
no_one/pytorch/__init__.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.1.0
2
+ astunparse==1.6.3
3
+ cachetools==5.2.0
4
+ certifi==2022.5.18.1
5
+ charset-normalizer==2.0.12
6
+ coloredlogs==15.0.1
7
+ flatbuffers==1.12
8
+ gast==0.4.0
9
+ google-auth==2.8.0
10
+ google-auth-oauthlib==0.4.6
11
+ google-pasta==0.2.0
12
+ grpcio==1.46.3
13
+ h5py==3.7.0
14
+ humanfriendly==10.0
15
+ idna==3.3
16
+ importlib-metadata==4.11.4
17
+ keras==2.9.0
18
+ Keras-Preprocessing==1.1.2
19
+ libclang==14.0.1
20
+ Markdown==3.3.7
21
+ numpy
22
+ oauthlib==3.2.0
23
+ opt-einsum==3.3.0
24
+ packaging==21.3
25
+ protobuf==3.19.4
26
+ pyasn1==0.4.8
27
+ pyasn1-modules==0.2.8
28
+ pyparsing==3.0.9
29
+ requests==2.28.0
30
+ requests-oauthlib==1.3.1
31
+ rsa==4.8
32
+ six==1.16.0
33
+ tensorboard==2.9.1
34
+ tensorboard-data-server==0.6.1
35
+ tensorboard-plugin-wit==1.8.1
36
+ tensorflow==2.9.1
37
+ tensorflow-estimator==2.9.0
38
+ tensorflow-io-gcs-filesystem==0.26.0
39
+ termcolor==1.1.0
40
+ torch==1.11.0
41
+ tqdm==4.64.0
42
+ typing_extensions==4.2.0
43
+ urllib3==1.26.9
44
+ Werkzeug==2.1.2
45
+ wrapt==1.14.1
46
+ zipp==3.8.0
utils.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class AverageMeter(object):
2
+ """From https://github.com/pytorch/examples/blob/master/imagenet/main.py"""
3
+
4
+ def __init__(self):
5
+ self.val = 0
6
+ self.avg = 0
7
+ self.sum = 0
8
+ self.count = 0
9
+
10
+ def __repr__(self):
11
+ return f'{self.avg:.2e}'
12
+
13
+ def update(self, val, n=1):
14
+ self.val = val
15
+ self.sum += val * n
16
+ self.count += n
17
+ self.avg = self.sum / self.count
18
+
19
+
20
+ class dotdict(dict):
21
+ def __getattr__(self, name):
22
+ return self[name]