Spaces:

nullne
/

no_one

Sleeping

App Files Files Community

nullne commited on Feb 22

Commit

eb34cec

•

1 Parent(s): 71b7125

Add application file

Browse files

it works

player settings

fix bug

refine the settings and add help doc

fix param

Files changed (18) hide show

.gitattributes +1 -0
Arena.py +102 -0
Game.py +113 -0
MCTS.py +256 -0
NeuralNet.py +50 -0
app.py +244 -0
models/firstBlood.pth.tar +3 -0
models/random.pth.tar +3 -0
models/super.pth.tar +3 -0
no_one/NoOneGame.py +206 -0
no_one/NoOneLogic.py +107 -0
no_one/NoOnePlayes.py +42 -0
no_one/__init__.py +0 -0
no_one/pytorch/NNet.py +120 -0
no_one/pytorch/NoOneNNet.py +54 -0
no_one/pytorch/__init__.py +0 -0
requirements.txt +46 -0
utils.py +22 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pth.tar filter=lfs diff=lfs merge=lfs -text

Arena.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import logging
+from tqdm import tqdm
+log = logging.getLogger(__name__)
+class Arena():
+    """
+    An Arena class where any 2 agents can be pit against each other.
+    """
+    def __init__(self, player1, player2, game, display=None):
+        """
+        Input:
+            player 1,2: two functions that takes board as input, return action
+            game: Game object
+            display: a function that takes board as input and prints it (e.g.
+                     display in othello/OthelloGame). Is necessary for verbose
+                     mode.
+        see othello/OthelloPlayers.py for an example. See pit.py for pitting
+        human players/other baselines with each other.
+        """
+        self.player1 = player1
+        self.player2 = player2
+        self.game = game
+        self.display = display
+    def playGame(self, verbose=False):
+        """
+        Executes one episode of a game.
+        Returns:
+            either
+                winner: player who won the game (1 if player1, -1 if player2)
+            or
+                draw result returned from the game that is neither 1, -1, nor 0.
+        """
+        players = [self.player2, None, self.player1]
+        curPlayer = 1
+        board = self.game.getInitBoard()
+        it = 0
+        while self.game.getGameEnded(board, curPlayer) == 0:
+            if it > 100:
+                return 0.01
+            it += 1
+            if verbose:
+                assert self.display
+                print("Turn ", str(it), "Player ", str(curPlayer))
+                self.display(board)
+            action = players[curPlayer + 1](self.game.getCanonicalForm(board, curPlayer))
+            valids = self.game.getValidMoves(self.game.getCanonicalForm(board, curPlayer), 1)
+            if valids[action] == 0:
+                log.error(f'Action {action} is not valid!')
+                log.debug(f'valids = {valids}')
+                assert valids[action] > 0
+            board, curPlayer = self.game.getNextState(board, curPlayer, action)
+        if verbose:
+            assert self.display
+            print("Game over: Turn ", str(it), "Result ", str(self.game.getGameEnded(board, 1)))
+            self.display(board)
+        return curPlayer * self.game.getGameEnded(board, curPlayer)
+    def playGames(self, num, verbose=False):
+        """
+        Plays num games in which player1 starts num/2 games and player2 starts
+        num/2 games.
+        Returns:
+            oneWon: games won by player1
+            twoWon: games won by player2
+            draws:  games won by nobody
+        """
+        num = int(num / 2)
+        oneWon = 0
+        twoWon = 0
+        draws = 0
+        for _ in tqdm(range(num), desc="Arena.playGames (1)"):
+            gameResult = self.playGame(verbose=verbose)
+            if gameResult == 1:
+                oneWon += 1
+            elif gameResult == -1:
+                twoWon += 1
+            else:
+                draws += 1
+        self.player1, self.player2 = self.player2, self.player1
+        for _ in tqdm(range(num), desc="Arena.playGames (2)"):
+            gameResult = self.playGame(verbose=verbose)
+            if gameResult == -1:
+                oneWon += 1
+            elif gameResult == 1:
+                twoWon += 1
+            else:
+                draws += 1
+        return oneWon, twoWon, draws

Game.py ADDED Viewed

	@@ -0,0 +1,113 @@

+class Game():
+    """
+    This class specifies the base Game class. To define your own game, subclass
+    this class and implement the functions below. This works when the game is
+    two-player, adversarial and turn-based.
+    Use 1 for player1 and -1 for player2.
+    See othello/OthelloGame.py for an example implementation.
+    """
+    def __init__(self):
+        pass
+    def getInitBoard(self):
+        """
+        Returns:
+            startBoard: a representation of the board (ideally this is the form
+                        that will be the input to your neural network)
+        """
+        pass
+    def getBoardSize(self):
+        """
+        Returns:
+            (x,y): a tuple of board dimensions
+        """
+        pass
+    def getActionSize(self):
+        """
+        Returns:
+            actionSize: number of all possible actions
+        """
+        pass
+    def getNextState(self, board, player, action):
+        """
+        Input:
+            board: current board
+            player: current player (1 or -1)
+            action: action taken by current player
+        Returns:
+            nextBoard: board after applying action
+            nextPlayer: player who plays in the next turn (should be -player)
+        """
+        pass
+    def getValidMoves(self, board, player):
+        """
+        Input:
+            board: current board
+            player: current player
+        Returns:
+            validMoves: a binary vector of length self.getActionSize(), 1 for
+                        moves that are valid from the current board and player,
+                        0 for invalid moves
+        """
+        pass
+    def getGameEnded(self, board, player):
+        """
+        Input:
+            board: current board
+            player: current player (1 or -1)
+        Returns:
+            r: 0 if game has not ended. 1 if player won, -1 if player lost,
+               small non-zero value for draw.
+        """
+        pass
+    def getCanonicalForm(self, board, player):
+        """
+        Input:
+            board: current board
+            player: current player (1 or -1)
+        Returns:
+            canonicalBoard: returns canonical form of board. The canonical form
+                            should be independent of player. For e.g. in chess,
+                            the canonical form can be chosen to be from the pov
+                            of white. When the player is white, we can return
+                            board as is. When the player is black, we can invert
+                            the colors and return the board.
+        """
+        pass
+    def getSymmetries(self, board, pi):
+        """
+        Input:
+            board: current board
+            pi: policy vector of size self.getActionSize()
+        Returns:
+            symmForms: a list of [(board,pi)] where each tuple is a symmetrical
+                       form of the board and the corresponding pi vector. This
+                       is used when training the neural network from examples.
+        """
+        pass
+    def stringRepresentation(self, board):
+        """
+        Input:
+            board: current board
+        Returns:
+            boardString: a quick conversion of board to a string format.
+                         Required by MCTS for hashing.
+        """
+        pass

MCTS.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import logging
+import math
+import numpy as np
+# from no_one.NoOnePlayes import HumanPlayer
+EPS = 1e-8
+log = logging.getLogger(__name__)
+class MCTS():
+    """
+    This class handles the MCTS tree.
+    """
+    def __init__(self, game, nnet, args):
+        self.game = game
+        self.nnet = nnet
+        self.args = args
+        self.Qsa = {}  # stores Q values for s,a (as defined in the paper)
+        self.Nsa = {}  # stores #times edge s,a was visited
+        self.Ns = {}  # stores #times board s was visited
+        self.Ps = {}  # stores initial policy (returned by neural net)
+        self.Es = {}  # stores game.getGameEnded ended for board s
+        self.Vs = {}  # stores game.getValidMoves for board s
+    def getActionProb(self, canonicalBoard, temp=1):
+        """
+        This function performs numMCTSSims simulations of MCTS starting from
+        canonicalBoard.
+        Returns:
+            probs: a policy vector where the probability of the ith action is
+                   proportional to Nsa[(s,a)]**(1./temp)
+        """
+        for i in range(self.args.numMCTSSims):
+            # self.search(canonicalBoard)
+            self.game.reset_steps()
+            self.search(canonicalBoard)
+        s = self.game.stringRepresentation(canonicalBoard)
+        counts = [self.Nsa[(s, a)] if (s, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
+        if temp == 0:
+            bestAs = np.array(np.argwhere(counts == np.max(counts))).flatten()
+            bestA = np.random.choice(bestAs)
+            probs = [0] * len(counts)
+            probs[bestA] = 1
+            return probs
+        counts = [x ** (1. / temp) for x in counts]
+        counts_sum = float(sum(counts))
+        if counts_sum == 0:
+            print(len(counts))
+        probs = [x / counts_sum for x in counts]
+        return probs
+    def search_iterate(self, canonicalBoard):
+        stack = [(0, (canonicalBoard,))]  # Stack of (state, depth, parent_index)
+        results = []  # To store the results of leaf or terminal nodes
+        while stack:
+            st, sv = stack.pop()
+            if st == 0:
+                result, ns = self.search_iterate_st0(sv[0])
+                if result is not None:
+                    results.append(result)
+                if ns is not None:
+                    stack.append((1, (ns[1], ns[2])))
+                    stack.append((0, (ns[0],)))
+            elif st == 1:
+                v = results.pop()
+                v = self.search_iterate_update(v, sv[0], sv[1])
+                results.append(v)
+            else:
+                raise ValueError("Invalid state")
+        return results.pop()
+    def search_iterate_st0(self, canonicalBoard):
+        s = self.game.stringRepresentation(canonicalBoard)
+        if s not in self.Es:
+            self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
+        if self.Es[s] != 0:
+            result = -self.Es[s]
+            return result, None
+        if s not in self.Ps:
+            # leaf node
+            self.Ps[s], v = self.nnet.predict(canonicalBoard)
+            valids = self.game.getValidMoves(canonicalBoard, 1)
+            self.Ps[s] = self.Ps[s] * valids
+            sum_Ps_s = np.sum(self.Ps[s])
+            if sum_Ps_s > 0:
+                self.Ps[s] /= sum_Ps_s
+            else:
+                self.Ps[s] = self.Ps[s] + valids
+                self.Ps[s] /= np.sum(self.Ps[s])
+            self.Vs[s] = valids
+            self.Ns[s] = 0
+            return -v, None
+        valids = self.Vs[s]
+        cur_best = -float('inf')
+        best_act = -1
+        for a in range(self.game.getActionSize()):
+            if valids[a]:
+                if (s, a) in self.Qsa:
+                    u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (1 + self.Nsa[(s, a)])
+                else:
+                    u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS)
+                if u > cur_best:
+                    cur_best = u
+                    best_act = a
+        next_s, next_player = self.game.getNextState(canonicalBoard, 1, best_act)
+        next_s = self.game.getCanonicalForm(next_s, next_player)
+        return None, (next_s, s, best_act)
+        # index = len(results)  # Current index in results
+        # stack.append((next_s, depth + 1, index))
+        # # Backpropagate results
+        # for v, parent_index in reversed(results):
+        #     if parent_index is not None:
+        #         parent_v, _ = results[parent_index]
+        #         results[parent_index] = ((parent_v * self.Ns[s] + v) / (self.Ns[s] + 1), _)
+        #         self.Ns[s] += 1
+        # # Update Qsa and Nsa based on backpropagation
+        # for i, (v, parent_index) in enumerate(results):
+        #     if parent_index is not None:  # Ignore root
+        #         _, action = stack[i]  # Assuming we also pushed actions to stack
+        #         if (s, action) in self.Qsa:
+        #             self.Qsa[(s, action)] = (self.Nsa[(s, action)] * self.Qsa[(s, action)] + v) / (self.Nsa[(s, action)] + 1)
+        #             self.Nsa[(s, action)] += 1
+        #         else:
+        #             self.Qsa[(s, action)] = v
+        #             self.Nsa[(s, action)] = 1
+        # return -results[0][0]  # Return the negated value of the root node
+    def search_iterate_update(self, v, s, a):
+        if (s, a) in self.Qsa:
+            self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
+            self.Nsa[(s, a)] += 1
+        else:
+            self.Qsa[(s, a)] = v
+            self.Nsa[(s, a)] = 1
+        self.Ns[s] += 1
+        return -v
+    def search(self, canonicalBoard, depth=0):
+        """
+        This function performs one iteration of MCTS. It is recursively called
+        till a leaf node is found. The action chosen at each node is one that
+        has the maximum upper confidence bound as in the paper.
+        Once a leaf node is found, the neural network is called to return an
+        initial policy P and a value v for the state. This value is propagated
+        up the search path. In case the leaf node is a terminal state, the
+        outcome is propagated up the search path. The values of Ns, Nsa, Qsa are
+        updated.
+        NOTE: the return values are the negative of the value of the current
+        state. This is done since v is in [-1,1] and if v is the value of a
+        state for the current player, then its value is -v for the other player.
+        Returns:
+            v: the negative of the value of the current canonicalBoard
+        """
+        s = self.game.stringRepresentation(canonicalBoard)
+        if s not in self.Es:
+            self.Es[s] = self.game.getGameEnded(canonicalBoard, 1)
+        if self.Es[s] != 0:
+            # terminal node
+            return -self.Es[s]
+        if s not in self.Ps:
+            # leaf node
+            self.Ps[s], v = self.nnet.predict(canonicalBoard)
+            valids = self.game.getValidMoves(canonicalBoard, 1)
+            self.Ps[s] = self.Ps[s] * valids  # masking invalid moves
+            sum_Ps_s = np.sum(self.Ps[s])
+            if sum_Ps_s > 0:
+                self.Ps[s] /= sum_Ps_s  # renormalize
+            else:
+                # if all valid moves were masked make all valid moves equally probable
+                # NB! All valid moves may be masked if either your NNet architecture is insufficient or you've get overfitting or something else.
+                # If you have got dozens or hundreds of these messages you should pay attention to your NNet and/or training process.
+                log.error("All valid moves were masked, doing a workaround.")
+                self.Ps[s] = self.Ps[s] + valids
+                self.Ps[s] /= np.sum(self.Ps[s])
+            self.Vs[s] = valids
+            self.Ns[s] = 0
+            return -v
+        valids = self.Vs[s]
+        cur_best = -float('inf')
+        best_act = -1
+        # pick the action with the highest upper confidence bound
+        for a in range(self.game.getActionSize()):
+            if valids[a]:
+                if (s, a) in self.Qsa:
+                    u = self.Qsa[(s, a)] + self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s]) / (
+                            1 + self.Nsa[(s, a)])
+                else:
+                    u = self.args.cpuct * self.Ps[s][a] * math.sqrt(self.Ns[s] + EPS)  # Q = 0 ?
+                if u > cur_best:
+                    cur_best = u
+                    best_act = a
+        a = best_act
+        if depth > 100:
+            candidates = self.game.getValidMoves(canonicalBoard, 1)
+            a = np.random.choice([i for i in range(len(candidates)) if candidates[i] == 1])
+            # self.game.display(canonicalBoard)
+            # human_player = HumanPlayer(self.game)
+            # a = human_player.play(canonicalBoard)
+            depth = 80
+        next_s, next_player = self.game.getNextState(canonicalBoard, 1, a)
+        next_s = self.game.getCanonicalForm(next_s, next_player)
+        # print("*", end="")
+        # self.game.display(next_s)
+        v = self.search(next_s, depth=depth + 1)
+        if (s, a) in self.Qsa:
+            self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1)
+            self.Nsa[(s, a)] += 1
+        else:
+            self.Qsa[(s, a)] = v
+            self.Nsa[(s, a)] = 1
+        self.Ns[s] += 1
+        return -v

NeuralNet.py ADDED Viewed

	@@ -0,0 +1,50 @@

+class NeuralNet():
+    """
+    This class specifies the base NeuralNet class. To define your own neural
+    network, subclass this class and implement the functions below. The neural
+    network does not consider the current player, and instead only deals with
+    the canonical form of the board.
+    See othello/NNet.py for an example implementation.
+    """
+    def __init__(self, game):
+        pass
+    def train(self, examples):
+        """
+        This function trains the neural network with examples obtained from
+        self-play.
+        Input:
+            examples: a list of training examples, where each example is of form
+                      (board, pi, v). pi is the MCTS informed policy vector for
+                      the given board, and v is its value. The examples has
+                      board in its canonical form.
+        """
+        pass
+    def predict(self, board):
+        """
+        Input:
+            board: current board in its canonical form.
+        Returns:
+            pi: a policy vector for the current board- a numpy array of length
+                game.getActionSize
+            v: a float in [-1,1] that gives the value of the current board
+        """
+        pass
+    def save_checkpoint(self, folder, filename):
+        """
+        Saves the current neural network (with its parameters) in
+        folder/filename
+        """
+        pass
+    def load_checkpoint(self, folder, filename):
+        """
+        Loads parameters of the neural network from folder/filename
+        """
+        pass

app.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import streamlit as st
+import numpy as np
+import Arena
+from MCTS import MCTS
+from no_one.NoOneGame import NoOneGame
+from no_one.pytorch.NNet import NNetWrapper as NNet
+from utils import *
+import time
+square_content = {
+    -1: "❌",
+    +0: "·",
+    +1: "⭕"
+}
+players = [{"name": "❌"}, {"name": "⭕"}]
+player_types = [":rainbow[AI]", "Human"]
+ai_ranks = ["Super", "First Blood", "Random"]
+ai_models = {
+    "Super": "super.pth.tar",
+    "First Blood": "firstBlood.pth.tar",
+    "Random": "random.pth.tar",
+}
+game = NoOneGame(4)
+def check_clicked(i, j):
+    if st.session_state.clicked == (i, j):
+        return True
+    return False
+class HumanPlayer:
+    def __init__(self, game):
+        self.game = game
+    def encode(self, src, target):
+        row, col = src[0] - target[0], src[1] - target[1]
+        if (row, col) == (0, -1):
+            d = 1
+        elif (row, col) == (0, 1):
+            d = 3
+        elif (row, col) == (-1, 0):
+            d = 0
+        elif (row, col) == (1, 0):
+            d = 2
+        else:
+            return None
+        return self.game.encodeAction(src, d)
+    def play(self, board, player, src, target):
+        action = self.encode(src, target)
+        if action is None:
+            return None
+        valids = self.game.getValidMoves(board, player)
+        if valids[action] != 1:
+            return None
+        return action
+def ai_player(ai_model):
+    n1 = NNet(game)
+    n1.load_checkpoint('./models/', ai_models[ai_model])
+    args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
+    msts1 = MCTS(game, n1, args1)
+    return lambda x: np.argmax(msts1.getActionProb(x, temp=0))
+def human_step(i, j):
+    if st.session_state.clicked is None:
+        if st.session_state.board[i, j] != st.session_state.player:
+            st.toast('Invalid move!')
+            return
+        st.session_state.clicked = (i, j)
+        return
+    elif st.session_state.clicked == (i, j):
+        # selection canceled
+        st.session_state.clicked = None
+        return
+    elif st.session_state.board[i, j] == st.session_state.player:
+        st.session_state.clicked = (i, j)
+        return
+    else:
+        # move
+        player = st.session_state.players[st.session_state.player + 1]
+        action = player.play(st.session_state.board, st.session_state.player, st.session_state.clicked, (i, j))
+        if action is None:
+            st.toast('Invalid move!')
+            return
+        move(action)
+        st.session_state.clicked = None
+def ai_step():
+    p = st.session_state.player
+    action = st.session_state.players[p+1](game.getCanonicalForm(st.session_state.board, st.session_state.player))
+    # st.session_state.board, st.session_state.player = game.getNextState(
+    #     st.session_state.board, p, action,
+    # )
+    move(action)
+def move(action):
+    p = st.session_state.player
+    st.session_state.board, st.session_state.player = game.getNextState(
+        st.session_state.board, p, action,
+    )
+    res = game.getGameEnded(st.session_state.board, p)
+    if res != 0:
+        st.session_state.winner = res*p
+        st.session_state.win[res*p] += 1
+        st.balloons()
+def reinit():
+    st.session_state.reinit = True
+def init(post_init=False):
+    st.session_state.reinit = False
+    if not post_init:
+        st.session_state.win = {-1: 0, 1: 0}
+    st.session_state.board = game.getInitBoard()
+    st.session_state.player = -1
+    st.session_state.players = [None for i in range(2)]
+    for i, setting in enumerate(st.session_state.player_settings):
+        if setting["pt"] == player_types[1]:
+            st.session_state.players[i] = HumanPlayer(game)
+        else:
+            st.session_state.players[i] = ai_player(setting["ai_model"])
+    st.session_state.players.insert(1, None)
+    st.session_state.winner = None
+    st.session_state.clicked = None
+def player_to_index(p):
+    if p == -1:
+        return 0
+    elif p == 1:
+        return 1
+def main():
+    if "player_settings" not in st.session_state:
+        st.session_state.player_settings = [
+            {"pt": player_types[1], "ai_model": ai_ranks[0]},
+            {"pt": player_types[0], "ai_model": ai_ranks[1]},
+        ]
+    if "reinit" not in st.session_state:
+        st.session_state.reinit = False
+    fire, settings = st.columns([1, 2])
+    fire.button('New Game', on_click=init, args=(True,))
+    with settings.expander(
+        'Settings',
+        expanded=False,
+    ):
+        st.warning('Any setting changing will restart the game immediately', icon="⚠️")
+        for i, p in enumerate(st.columns([0.5, 0.5])):
+            with p:
+                # players[i]
+                st.session_state.player_settings[i]["pt"] = st.radio(
+                    f"Who will play %s" % players[i]["name"],
+                    player_types,
+                    key=f"xp_type_{i}",
+                    horizontal=True,
+                    index=player_types.index(st.session_state.player_settings[i]["pt"]),
+                    on_change=reinit,
+                )
+                if st.session_state.player_settings[i]["pt"] == player_types[0]:
+                    st.session_state.player_settings[i]["ai_model"] = st.radio(
+                        "AI rank",
+                        ai_ranks,
+                        key=f"xp_rank_{i}",
+                        index=ai_ranks.index(st.session_state.player_settings[i]["ai_model"]),
+                        on_change=reinit,
+                    )
+        st.divider()
+        st.checkbox("Show me how to play", key="show_how_to_play")
+    if "board" not in st.session_state or st.session_state.reinit:
+        init()
+    if st.session_state.show_how_to_play:
+        with st.sidebar:
+            st.title("How to play")
+            how_to = '''1. 游戏的目标是吃掉对方的棋子
+  开局双方各有四枚棋子，被吃剩一枚棋子即可判负
+2. 棋子可以向上下左右移动到空白位置
+3. 如何吃子
+    1. 移动后的棋子需要跟本方其他某个棋子，在水平方向，或者垂直方向上连住
+    2. 如果连住后的棋子两端有对方一个棋子，那么这个对方的棋子就被吃掉
+    3. 如果对方也有两颗棋子，则互相不吃
+4. 如果落子在对方连起来的两枚棋子一端，也不会被吃
+5. 如果选择 AI vs AI，需要手动点击比分下面的回合按钮，触发下一次落子
+'''
+            st.markdown(how_to)
+    xp, score, op = st.columns([2, 8, 2])
+    for i, p in enumerate([xp, op]):
+        p.title(players[i]["name"], anchor=False)
+        caption = st.session_state.player_settings[i]["pt"]
+        if st.session_state.player_settings[i]["pt"] == player_types[0]:
+            caption = st.session_state.player_settings[i]["ai_model"] + " " + caption
+        p.caption(caption)
+    # st.session_state
+    if st.session_state.player_settings[player_to_index(st.session_state.player)]['pt'] != "Human" and st.session_state.winner is None:
+        ai_step()
+    st.divider()
+    for i, row in enumerate(st.session_state.board):
+        cols = st.columns([5, 1, 1, 1, 1, 5])
+        for j, field in enumerate(row):
+            cols[j + 1].button(
+                square_content[field],
+                key=f"{i}-{j}",
+                type=f'{"primary" if check_clicked(i, j) else "secondary"}',
+                on_click=human_step,
+                args=(i, j),
+            )
+    s = score.columns([2, 2, 2])
+    s[1].title(f'{st.session_state.win[-1]} : {st.session_state.win[1]}', anchor=False)
+    s[1].button(
+        f'{"❌" if st.session_state.player == -1 else "⭕"}\'s turn'
+        if not st.session_state.winner
+        else f'🏁 Game finished'
+    )
+if __name__ == '__main__':
+    main()

models/firstBlood.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:944608ae57a59c24f3c5ab6ed4cb7f387903154bf97d6a57cbf207747a55f304
+size 39033691

models/random.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ac3537ec1644bf08ae41488d7a98fdcf1637ff6009de01d36b1edb4cc0e8c53
+size 39033691

models/super.pth.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e24aa62ce56a68e74d78593e487d38a4a6a65917b05f8a684a33274072edf4a0
+size 39034331

no_one/NoOneGame.py ADDED Viewed

	@@ -0,0 +1,206 @@

+from __future__ import print_function
+import sys
+sys.path.append('..')
+from Game import Game
+from .NoOneLogic import Board
+import numpy as np
+class NoOneGame(Game):
+    """
+    This class specifies the base Game class. To define your own game, subclass
+    this class and implement the functions below. This works when the game is
+    two-player, adversarial and turn-based.
+    Use 1 for player1 and -1 for player2.
+    See othello/OthelloGame.py for an example implementation.
+    """
+    square_content = {
+        -1: "X",
+        +0: "-",
+        +1: "O"
+    }
+    def __init__(self, n):
+        self.n = n
+        self.steps = 0
+    def reach_max_steps(self):
+        return self.steps >= 500
+    def reset_steps(self):
+        self.steps = 0
+    def getInitBoard(self):
+        """
+        Returns:
+            startBoard: a representation of the board (ideally this is the form
+                        that will be the input to your neural network)
+        """
+        b = Board(self.n)
+        return np.array(b.pieces)
+    def getBoardSize(self):
+        """
+        Returns:
+            (x,y): a tuple of board dimensions
+        """
+        return (self.n, self.n)
+    def getActionSize(self):
+        """
+        Returns:
+            actionSize: number of all possible actions
+        """
+        return self.n * self.n * 4
+    def decodeAction(self, action):
+        i = action // (self.n * self.n)
+        x = (action % (self.n * self.n)) % self.n
+        y = (action % (self.n * self.n)) // self.n
+        return ([x, y], i)
+    def encodeAction(self, pos, i):
+        return self.n*self.n*i + self.n*pos[1] + pos[0]
+    def getNextState(self, board, player, action):
+        """
+        Input:
+            board: current board
+            player: current player (1 or -1)
+            action: action taken by current player
+        Returns:
+            nextBoard: board after applying action
+            nextPlayer: player who plays in the next turn (should be -player)
+        """
+        (pos, i) = self.decodeAction(action)
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        move = (pos, i)
+        b.execute_move(move, player)
+        # self.display(b.pieces)
+        self.steps += 1
+        return (b.pieces, -player)
+    def getValidMoves(self, board, player):
+        """
+        Input:
+            board: current board
+            player: current player
+        Returns:
+            validMoves: a binary vector of length self.getActionSize(), 1 for
+                        moves that are valid from the current board and player,
+                        0 for invalid moves
+        """
+        valids = [0]*self.getActionSize()
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        legalMoves = b.get_legal_moves(player)
+        for (pos, i) in legalMoves:
+            valids[self.encodeAction(pos, i)] = 1
+        return np.array(valids)
+    def getGameEnded(self, board, player):
+        """
+        Input:
+            board: current board
+            player: current player (1 or -1)
+        Returns:
+            r: 0 if game has not ended. 1 if player won, -1 if player lost,
+               small non-zero value for draw.
+        """
+        b = Board(self.n)
+        b.pieces = np.copy(board)
+        # for the early stage of training
+        # if b.count(player) == b.count(-player):
+        #     return 0
+        # return 1 if b.count(player) > b.count(-player) else -1
+        if b.count(player) <= 1:
+            return -1
+        if b.count(-player) <= 1:
+            return 1
+        # if self.reach_max_steps():
+        #     print("reach max steps")
+        #     self.steps = 0
+        #     if b.count(player) == b.count(-player):
+        #         return 0.01
+        #     return 1 if b.count(player) > b.count(-player) else -1
+        return 0
+    def getCanonicalForm(self, board, player):
+        """
+        Input:
+            board: current board
+            player: current player (1 or -1)
+        Returns:
+            canonicalBoard: returns canonical form of board. The canonical form
+                            should be independent of player. For e.g. in chess,
+                            the canonical form can be chosen to be from the pov
+                            of white. When the player is white, we can return
+                            board as is. When the player is black, we can invert
+                            the colors and return the board.
+        """
+        return player*board
+    def getSymmetries(self, board, pi):
+        """
+        Input:
+            board: current board
+            pi: policy vector of size self.getActionSize()
+        Returns:
+            symmForms: a list of [(board,pi)] where each tuple is a symmetrical
+                       form of the board and the corresponding pi vector. This
+                       is used when training the neural network from examples.
+        """
+        assert(len(pi) == self.n**2*4)  # 1 for pass
+        pi_board = np.reshape(pi, (4, self.n, self.n))
+        l = []
+        for i in range(1, 5):
+            for j in [True, False]:
+                newB = np.rot90(board, i)
+                newPi = np.rot90(pi_board, i)
+                if j:
+                    newB = np.fliplr(newB)
+                    newPi = np.fliplr(newPi)
+                l += [(newB, list(newPi.ravel()))]
+        return l
+    def stringRepresentation(self, board):
+        """
+        Input:
+            board: current board
+        Returns:
+            boardString: a quick conversion of board to a string format.
+                         Required by MCTS for hashing.
+        """
+        return board.tostring()
+    @staticmethod
+    def display(board):
+        n = board.shape[0]
+        print("   ", end="")
+        for y in range(n):
+            print(y, end=" ")
+        print("")
+        print("-----------------------")
+        for y in range(n):
+            print(y, "|", end="")    # print the row #
+            for x in range(n):
+                piece = board[y][x]    # get the piece to print
+                print(NoOneGame.square_content[piece], end=" ")
+            print("|")
+        print("-----------------------")

no_one/NoOneLogic.py ADDED Viewed

	@@ -0,0 +1,107 @@

+class Board:
+    """
+    (1 for white, -1 for black, 0 for empty spaces)"""
+    __directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
+    __readable_directions = ['down', 'right', 'up', 'left']
+    @staticmethod
+    def readableDirection(i):
+        return Board.__readable_directions[i]
+    def __init__(self, n):
+        self.n = n
+        self.steps = 0
+        self.max_steps = 1000
+        self.pieces = [[0 for _ in range(n)] for _ in range(n)]
+        # setup the initial board
+        for i in range(n):
+            self.pieces[0][i] = 1
+            self.pieces[-1][i] = -1
+    def count(self, color):
+        count = 0
+        for x in range(self.n):
+            for y in range(self.n):
+                if self.pieces[x][y] == color:
+                    count += 1
+        return count
+    def reach_max_steps(self):
+        print("steps:", self.steps)
+        return self.steps >= self.max_steps
+    def get_legal_moves(self, color):
+        """Return all the legal moves for the given color.
+        """
+        moves = set()
+        for i in range(self.n):
+            for j in range(self.n):
+                if self.pieces[i][j] == color:
+                    newmoves = self.get_legal_moves_from((i, j))
+                    for m in newmoves:
+                        moves.add(((i, j), m))
+        return list(moves)
+    def get_legal_moves_from(self, p):
+        moves = set()
+        for i, direction in enumerate(self.__directions):
+            next_p = self.next_position(p, direction)
+            if next_p is None:
+                continue
+            if self.pieces[next_p[0]][next_p[1]] == 0:
+                moves.add(i)
+        return list(moves)
+    def execute_move(self, move, color):
+        self.steps += 1
+        (i, j), direction = move
+        (x, y) = self.next_position((i, j), self.__directions[direction])
+        if self.pieces[x][y] != 0:
+            raise ValueError(f'Invalid move: {move}')
+        self.pieces[i][j] = 0
+        self.pieces[x][y] = color
+        self.capture_pieces((x, y), color)
+        if len(self.get_legal_moves(-color)) == 0:
+            self.remove_all(-color)
+    def remove_all(self, color):
+        for i in range(self.n):
+            for j in range(self.n):
+                if self.pieces[i][j] == color:
+                    self.pieces[i][j] = 0
+    def capture_pieces(self, pos, color):
+        """
+        pos: current position of piece moved"""
+        for d in self.__directions:
+            npos = self.next_position(pos, d)
+            if npos is None:
+                continue
+            (x, y) = npos
+            if self.pieces[x][y] == color:
+                # two pieces of the same color
+                self.capture_piece(npos, d, color)
+                self.capture_piece(pos, (-d[0], -d[1]), color)
+    def capture_piece(self, pos, direction, color):
+        """pos所在位置，向着direction方向上，
+        如果有一个对面的棋子，就可以吃掉，如果有两个就不能"""
+        np = self.next_position(pos, direction)
+        if np is None:
+            return
+        if self.pieces[np[0]][np[1]] != -color:
+            return
+        nnp = self.next_position(np, direction)
+        if nnp is not None:
+            if self.pieces[nnp[0]][nnp[1]] == -color:
+                return
+        self.pieces[np[0]][np[1]] = 0
+    def next_position(self, pos, direction):
+        x, y = pos[0] + direction[0], pos[1] + direction[1]
+        if x < 0 or x >= self.n or y < 0 or y >= self.n:
+            return None
+        return (x, y)

no_one/NoOnePlayes.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import numpy as np
+from .NoOneLogic import Board
+class RandomPlayer():
+    def __init__(self, game):
+        self.game = game
+    def play(self, board):
+        a = np.random.randint(self.game.getActionSize())
+        valids = self.game.getValidMoves(board, 1)
+        while valids[a]!=1:
+            a = np.random.randint(self.game.getActionSize())
+        return a
+class HumanPlayer():
+    def __init__(self, game):
+        self.game = game
+    def play(self, board):
+        # display(board)
+        valid = self.game.getValidMoves(board, 1)
+        for i in range(len(valid)):
+            if valid[i]:
+                (pos, di) = self.game.decodeAction(i)
+                print("[", pos[0], pos[1], "%s(%s)" % (Board.readableDirection(di), di), end="] ")
+        while True:
+            input_move = input()
+            input_a = input_move.split(" ")
+            if len(input_a) == 3:
+                try:
+                    x, y, di = [int(i) for i in input_a]
+                    if ((0 <= x) and (x < self.game.n) and (0 <= y) and (y < self.game.n) and (0 <= di) and (di < 4)):
+                        a = self.game.encodeAction([x, y], di)
+                        if valid[a]:
+                            break
+                except ValueError:
+                    # Input needs to be an integer
+                    'Invalid integer'
+            print('Invalid move')
+        return a

no_one/__init__.py ADDED Viewed

File without changes

no_one/pytorch/NNet.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import os
+import sys
+import time
+import numpy as np
+from tqdm import tqdm
+sys.path.append('../../')
+from utils import *
+from NeuralNet import NeuralNet
+import torch
+import torch.optim as optim
+from .NoOneNNet import NoOneNNet as onnet
+args = dotdict({
+    'lr': 0.001,
+    'dropout': 0.3,
+    'epochs': 10,
+    'batch_size': 64,
+    'cuda': torch.cuda.is_available(),
+    'num_channels': 512,
+})
+class NNetWrapper(NeuralNet):
+    def __init__(self, game):
+        self.nnet = onnet(game, args)
+        self.board_x, self.board_y = game.getBoardSize()
+        self.action_size = game.getActionSize()
+        if args.cuda:
+            self.nnet.cuda()
+    def train(self, examples):
+        """
+        examples: list of examples, each example is of form (board, pi, v)
+        """
+        optimizer = optim.Adam(self.nnet.parameters())
+        for epoch in range(args.epochs):
+            print('EPOCH ::: ' + str(epoch + 1))
+            self.nnet.train()
+            pi_losses = AverageMeter()
+            v_losses = AverageMeter()
+            batch_count = int(len(examples) / args.batch_size)
+            t = tqdm(range(batch_count), desc='Training Net')
+            for _ in t:
+                sample_ids = np.random.randint(len(examples), size=args.batch_size)
+                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
+                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
+                target_pis = torch.FloatTensor(np.array(pis))
+                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))
+                # predict
+                if args.cuda:
+                    boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
+                # compute output
+                out_pi, out_v = self.nnet(boards)
+                l_pi = self.loss_pi(target_pis, out_pi)
+                l_v = self.loss_v(target_vs, out_v)
+                total_loss = l_pi + l_v
+                # record loss
+                pi_losses.update(l_pi.item(), boards.size(0))
+                v_losses.update(l_v.item(), boards.size(0))
+                t.set_postfix(Loss_pi=pi_losses, Loss_v=v_losses)
+                # compute gradient and do SGD step
+                optimizer.zero_grad()
+                total_loss.backward()
+                optimizer.step()
+    def predict(self, board):
+        """
+        board: np array with board
+        """
+        # timing
+        start = time.time()
+        # preparing input
+        board = torch.FloatTensor(board.astype(np.float64))
+        if args.cuda: board = board.contiguous().cuda()
+        board = board.view(1, self.board_x, self.board_y)
+        self.nnet.eval()
+        with torch.no_grad():
+            pi, v = self.nnet(board)
+        # print('PREDICTION TIME TAKEN : {0:03f}'.format(time.time()-start))
+        return torch.exp(pi).data.cpu().numpy()[0], v.data.cpu().numpy()[0]
+    def loss_pi(self, targets, outputs):
+        return -torch.sum(targets * outputs) / targets.size()[0]
+    def loss_v(self, targets, outputs):
+        return torch.sum((targets - outputs.view(-1)) ** 2) / targets.size()[0]
+    def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
+        filepath = os.path.join(folder, filename)
+        if not os.path.exists(folder):
+            print("Checkpoint Directory does not exist! Making directory {}".format(folder))
+            os.mkdir(folder)
+        else:
+            print("Checkpoint Directory exists! ")
+        torch.save({
+            'state_dict': self.nnet.state_dict(),
+        }, filepath)
+    def load_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
+        # https://github.com/pytorch/examples/blob/master/imagenet/main.py#L98
+        filepath = os.path.join(folder, filename)
+        if not os.path.exists(filepath):
+            raise ("No model in path {}".format(filepath))
+        map_location = None if args.cuda else 'cpu'
+        checkpoint = torch.load(filepath, map_location=map_location)
+        self.nnet.load_state_dict(checkpoint['state_dict'])

no_one/pytorch/NoOneNNet.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import sys
+sys.path.append('..')
+from utils import *
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+class NoOneNNet(nn.Module):
+    def __init__(self, game, args):
+        # game params
+        self.board_x, self.board_y = game.getBoardSize()
+        self.action_size = game.getActionSize()
+        self.args = args
+        super(NoOneNNet, self).__init__()
+        self.conv1 = nn.Conv2d(1, args.num_channels, 3, stride=1, padding=1)
+        self.conv2 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
+        self.conv3 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1, padding=1)
+        self.conv4 = nn.Conv2d(args.num_channels, args.num_channels, 3, stride=1)
+        self.bn1 = nn.BatchNorm2d(args.num_channels)
+        self.bn2 = nn.BatchNorm2d(args.num_channels)
+        self.bn3 = nn.BatchNorm2d(args.num_channels)
+        self.bn4 = nn.BatchNorm2d(args.num_channels)
+        self.fc1 = nn.Linear(args.num_channels*(self.board_x-2)*(self.board_y-2), 1024)
+        self.fc_bn1 = nn.BatchNorm1d(1024)
+        self.fc2 = nn.Linear(1024, 512)
+        self.fc_bn2 = nn.BatchNorm1d(512)
+        self.fc3 = nn.Linear(512, self.action_size)
+        self.fc4 = nn.Linear(512, 1)
+    def forward(self, s):
+        #                                                           s: batch_size x board_x x board_y
+        s = s.view(-1, 1, self.board_x, self.board_y)                # batch_size x 1 x board_x x board_y
+        s = F.relu(self.bn1(self.conv1(s)))                          # batch_size x num_channels x board_x x board_y
+        s = F.relu(self.bn2(self.conv2(s)))                          # batch_size x num_channels x board_x x board_y
+        s = F.relu(self.bn3(self.conv3(s)))                          # batch_size x num_channels x (board_x-2) x (board_y-2)
+        s = F.relu(self.bn4(self.conv4(s)))                          # batch_size x num_channels x (board_x-4) x (board_y-4)
+        s = s.view(-1, self.args.num_channels*(self.board_x-2)*(self.board_y-2))
+        s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), p=self.args.dropout, training=self.training)  # batch_size x 1024
+        s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), p=self.args.dropout, training=self.training)  # batch_size x 512
+        pi = self.fc3(s)                                                                         # batch_size x action_size
+        v = self.fc4(s)                                                                          # batch_size x 1
+        return F.log_softmax(pi, dim=1), torch.tanh(v)

no_one/pytorch/__init__.py ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+absl-py==1.1.0
+astunparse==1.6.3
+cachetools==5.2.0
+certifi==2022.5.18.1
+charset-normalizer==2.0.12
+coloredlogs==15.0.1
+flatbuffers==1.12
+gast==0.4.0
+google-auth==2.8.0
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.46.3
+h5py==3.7.0
+humanfriendly==10.0
+idna==3.3
+importlib-metadata==4.11.4
+keras==2.9.0
+Keras-Preprocessing==1.1.2
+libclang==14.0.1
+Markdown==3.3.7
+numpy
+oauthlib==3.2.0
+opt-einsum==3.3.0
+packaging==21.3
+protobuf==3.19.4
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pyparsing==3.0.9
+requests==2.28.0
+requests-oauthlib==1.3.1
+rsa==4.8
+six==1.16.0
+tensorboard==2.9.1
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.9.1
+tensorflow-estimator==2.9.0
+tensorflow-io-gcs-filesystem==0.26.0
+termcolor==1.1.0
+torch==1.11.0
+tqdm==4.64.0
+typing_extensions==4.2.0
+urllib3==1.26.9
+Werkzeug==2.1.2
+wrapt==1.14.1
+zipp==3.8.0

utils.py ADDED Viewed

	@@ -0,0 +1,22 @@

+class AverageMeter(object):
+    """From https://github.com/pytorch/examples/blob/master/imagenet/main.py"""
+    def __init__(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def __repr__(self):
+        return f'{self.avg:.2e}'
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+class dotdict(dict):
+    def __getattr__(self, name):
+        return self[name]