File size: 8,302 Bytes

import torch
import torch.nn as nn
import torch.optim as optim
import chess
import os
import chess.engine as eng
import torch.multiprocessing as mp
import random
from pathlib import Path

# CONFIGURATION
CONFIG = {
    "stockfish_path": "/Users/aaronvattay/Downloads/stockfish/stockfish-macos-m1-apple-silicon",
    "model_path": "chessy_model.pth",
    "backup_model_path": "chessy_modelt-1.pth",
    "device": torch.device("mps"),
    "learning_rate": 1e-4,
    "num_games": 30,
    "num_epochs": 10,
    "stockfish_time_limit": 1.0,
    "search_depth": 1,
    "epsilon": 4
}

device = CONFIG["device"]

def board_to_tensor(board):
    piece_encoding = {
        'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6,
        'p': 7, 'n': 8, 'b': 9, 'r': 10, 'q': 11, 'k': 12
    }

    tensor = torch.zeros(64, dtype=torch.long)
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece:
            tensor[square] = piece_encoding[piece.symbol()]
        else:
            tensor[square] = 0

    return tensor.unsqueeze(0)

class NN1(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(13, 64)
        self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=16)
        self.neu = 512
        self.neurons = nn.Sequential(
            nn.Linear(4096, self.neu),
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, self.neu), 
            nn.ReLU(),
            nn.Linear(self.neu, 64), 
            nn.ReLU(),
            nn.Linear(64, 4)
        )

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(1, 0, 2)
        attn_output, _ = self.attention(x, x, x)
        x = attn_output.permute(1, 0, 2).contiguous()
        x = x.view(x.size(0), -1)
        x = self.neurons(x)
        return x

model = NN1().to(device)
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])

try:
    model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
    print(f"Loaded model from {CONFIG['model_path']}")
except FileNotFoundError:
    try:
        model.load_state_dict(torch.load(CONFIG["backup_model_path"], map_location=device))
        print(f"Loaded backup model from {CONFIG['backup_model_path']}")
    except FileNotFoundError:
        print("No model file found, starting from scratch.")

model.train()
criterion = nn.MSELoss()
engine = eng.SimpleEngine.popen_uci(CONFIG["stockfish_path"])
lim = eng.Limit(time=CONFIG["stockfish_time_limit"])

def get_evaluation(board):
    """
    Returns the evaluation of the board from the perspective of the current player.
    The model's output is from White's perspective.
    """
    tensor = board_to_tensor(board).to(device)
    with torch.no_grad():
        evaluation = model(tensor)[0][0].item()
    
    if board.turn == chess.WHITE:
        return evaluation
    else:
        return -evaluation

def search(board, depth, simulations=100):
    """
    Monte Carlo Tree Search with basic negamax evaluation.
    Reads moves from san_moves.txt and picks the best move.
    """
    # Load predefined moves from SAN file
    san_file_path = Path("ChessEnv/san_moves.txt")
    if san_file_path.exists():
        with open(san_file_path, "r") as f:
            san_moves = [line.strip() for line in f if line.strip()]
    else:
        san_moves = []

    def negamax(board, depth, alpha, beta):
        if depth == 0 or board.is_game_over():
            return get_evaluation(board)

        max_eval = float('-inf')
        for move in board.legal_moves:
            board.push(move)
            eval = -negamax(board, depth - 1, -beta, -alpha)
            board.pop()
            max_eval = max(max_eval, eval)
            alpha = max(alpha, eval)
            if alpha >= beta:
                break
        return max_eval

    move_scores = {}

    for _ in range(simulations):
        # Optionally start from a random legal move (Monte Carlo rollout)
        move = random.choice(list(board.legal_moves))
        
        # If san_moves.txt is not empty, prefer moves from it when possible
        move_san = board.san(move)
        if san_moves and move_san in san_moves:
            move = chess.Move.from_uci(board.parse_san(move_san).uci())
        
        board.push(move)
        score = -negamax(board, depth - 1, -float('inf'), float('inf'))
        board.pop()

        move_scores[move] = move_scores.get(move, 0) + score

    # Pick move with highest average score
    best_move = max(move_scores.items(), key=lambda x: x[1] / simulations)[0]
    return best_move



    

def game_gen(engine_side):
    data = []
    mc = 0
    board = chess.Board()
    while not board.is_game_over():
        is_bot_turn = board.turn != engine_side
        
        if is_bot_turn:
            evaling = {}
            for move in board.legal_moves:
                board.push(move)
                evaling[move] = -search(board, depth=CONFIG["search_depth"], alpha=float('-inf'), beta=float('inf'))
                board.pop()
            
            if not evaling:
                break
            
            keys = list(evaling.keys())
            logits = torch.tensor(list(evaling.values())).to(device)
            probs = torch.softmax(logits,dim=0)
            epsilon = min(CONFIG["epsilon"],len(keys))
            bests = torch.multinomial(probs,num_samples=epsilon,replacement=False)
            best_idx = bests[torch.argmax(logits[bests])]
            move = keys[best_idx.item()]
            
        else:
            result = engine.play(board, lim)
            move = result.move

        if is_bot_turn:
            data.append({
                'fen': board.fen(),
                'move_number': mc,
            })

        board.push(move)
        mc += 1

    result = board.result()
    c = 0
    if result == '1-0':
        c = 10.0
    elif result == '0-1':
        c = -10.0
    return data, c, mc
def train(data, c, mc):
    for entry in data:
            tensor = board_to_tensor(chess.Board(entry['fen'])).to(device)
            target = torch.tensor(c * entry['move_number'] / mc, dtype=torch.float32).to(device)
            output = model(tensor)[0][0]
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
    print(f"Saving model to {CONFIG['model_path']}")
    torch.save(model.state_dict(), CONFIG["model_path"])
    return
def main():
    for i in range(CONFIG["num_epochs"]):
     mp.set_start_method('spawn', force=True)
     num_games = CONFIG['num_games']
     num_instances = mp.cpu_count()
     print(f"Saving backup model to {CONFIG['backup_model_path']}")
     torch.save(model.state_dict(), CONFIG["backup_model_path"])
     with mp.Pool(processes=num_instances) as pool:
        results_self = pool.starmap(game_gen, [(None,) for _ in range(num_games // 3)])
        results_white = pool.starmap(game_gen, [(chess.WHITE,) for _ in range(num_games // 3)])
        results_black = pool.starmap(game_gen, [(chess.BLACK,) for _ in range(num_games // 3)])
        results = []
        for s, w, b in zip(results_self, results_white, results_black):
         results.extend([s, w, b])
        for batch in results:
            data, c, mc = batch
            print(f"Saving backup model to {CONFIG['backup_model_path']}")
            torch.save(model.state_dict(), CONFIG["backup_model_path"])
            if data:
                train(data, c, mc)
    print("Training complete.")
    engine.quit()
if __name__ == "__main__":
 main()