NeoChess-Community / selfchess.py
sigmoidneuron123's picture
Update selfchess.py
a8711ff verified
raw
history blame
8.3 kB
import torch
import torch.nn as nn
import torch.optim as optim
import chess
import os
import chess.engine as eng
import torch.multiprocessing as mp
import random
from pathlib import Path
# CONFIGURATION
CONFIG = {
"stockfish_path": "/Users/aaronvattay/Downloads/stockfish/stockfish-macos-m1-apple-silicon",
"model_path": "chessy_model.pth",
"backup_model_path": "chessy_modelt-1.pth",
"device": torch.device("mps"),
"learning_rate": 1e-4,
"num_games": 30,
"num_epochs": 10,
"stockfish_time_limit": 1.0,
"search_depth": 1,
"epsilon": 4
}
device = CONFIG["device"]
def board_to_tensor(board):
piece_encoding = {
'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6,
'p': 7, 'n': 8, 'b': 9, 'r': 10, 'q': 11, 'k': 12
}
tensor = torch.zeros(64, dtype=torch.long)
for square in chess.SQUARES:
piece = board.piece_at(square)
if piece:
tensor[square] = piece_encoding[piece.symbol()]
else:
tensor[square] = 0
return tensor.unsqueeze(0)
class NN1(nn.Module):
def __init__(self):
super().__init__()
self.embedding = nn.Embedding(13, 64)
self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=16)
self.neu = 512
self.neurons = nn.Sequential(
nn.Linear(4096, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, 64),
nn.ReLU(),
nn.Linear(64, 4)
)
def forward(self, x):
x = self.embedding(x)
x = x.permute(1, 0, 2)
attn_output, _ = self.attention(x, x, x)
x = attn_output.permute(1, 0, 2).contiguous()
x = x.view(x.size(0), -1)
x = self.neurons(x)
return x
model = NN1().to(device)
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
try:
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
print(f"Loaded model from {CONFIG['model_path']}")
except FileNotFoundError:
try:
model.load_state_dict(torch.load(CONFIG["backup_model_path"], map_location=device))
print(f"Loaded backup model from {CONFIG['backup_model_path']}")
except FileNotFoundError:
print("No model file found, starting from scratch.")
model.train()
criterion = nn.MSELoss()
engine = eng.SimpleEngine.popen_uci(CONFIG["stockfish_path"])
lim = eng.Limit(time=CONFIG["stockfish_time_limit"])
def get_evaluation(board):
"""
Returns the evaluation of the board from the perspective of the current player.
The model's output is from White's perspective.
"""
tensor = board_to_tensor(board).to(device)
with torch.no_grad():
evaluation = model(tensor)[0][0].item()
if board.turn == chess.WHITE:
return evaluation
else:
return -evaluation
def search(board, depth, simulations=100):
"""
Monte Carlo Tree Search with basic negamax evaluation.
Reads moves from san_moves.txt and picks the best move.
"""
# Load predefined moves from SAN file
san_file_path = Path("ChessEnv/san_moves.txt")
if san_file_path.exists():
with open(san_file_path, "r") as f:
san_moves = [line.strip() for line in f if line.strip()]
else:
san_moves = []
def negamax(board, depth, alpha, beta):
if depth == 0 or board.is_game_over():
return get_evaluation(board)
max_eval = float('-inf')
for move in board.legal_moves:
board.push(move)
eval = -negamax(board, depth - 1, -beta, -alpha)
board.pop()
max_eval = max(max_eval, eval)
alpha = max(alpha, eval)
if alpha >= beta:
break
return max_eval
move_scores = {}
for _ in range(simulations):
# Optionally start from a random legal move (Monte Carlo rollout)
move = random.choice(list(board.legal_moves))
# If san_moves.txt is not empty, prefer moves from it when possible
move_san = board.san(move)
if san_moves and move_san in san_moves:
move = chess.Move.from_uci(board.parse_san(move_san).uci())
board.push(move)
score = -negamax(board, depth - 1, -float('inf'), float('inf'))
board.pop()
move_scores[move] = move_scores.get(move, 0) + score
# Pick move with highest average score
best_move = max(move_scores.items(), key=lambda x: x[1] / simulations)[0]
return best_move
def game_gen(engine_side):
data = []
mc = 0
board = chess.Board()
while not board.is_game_over():
is_bot_turn = board.turn != engine_side
if is_bot_turn:
evaling = {}
for move in board.legal_moves:
board.push(move)
evaling[move] = -search(board, depth=CONFIG["search_depth"], alpha=float('-inf'), beta=float('inf'))
board.pop()
if not evaling:
break
keys = list(evaling.keys())
logits = torch.tensor(list(evaling.values())).to(device)
probs = torch.softmax(logits,dim=0)
epsilon = min(CONFIG["epsilon"],len(keys))
bests = torch.multinomial(probs,num_samples=epsilon,replacement=False)
best_idx = bests[torch.argmax(logits[bests])]
move = keys[best_idx.item()]
else:
result = engine.play(board, lim)
move = result.move
if is_bot_turn:
data.append({
'fen': board.fen(),
'move_number': mc,
})
board.push(move)
mc += 1
result = board.result()
c = 0
if result == '1-0':
c = 10.0
elif result == '0-1':
c = -10.0
return data, c, mc
def train(data, c, mc):
for entry in data:
tensor = board_to_tensor(chess.Board(entry['fen'])).to(device)
target = torch.tensor(c * entry['move_number'] / mc, dtype=torch.float32).to(device)
output = model(tensor)[0][0]
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Saving model to {CONFIG['model_path']}")
torch.save(model.state_dict(), CONFIG["model_path"])
return
def main():
for i in range(CONFIG["num_epochs"]):
mp.set_start_method('spawn', force=True)
num_games = CONFIG['num_games']
num_instances = mp.cpu_count()
print(f"Saving backup model to {CONFIG['backup_model_path']}")
torch.save(model.state_dict(), CONFIG["backup_model_path"])
with mp.Pool(processes=num_instances) as pool:
results_self = pool.starmap(game_gen, [(None,) for _ in range(num_games // 3)])
results_white = pool.starmap(game_gen, [(chess.WHITE,) for _ in range(num_games // 3)])
results_black = pool.starmap(game_gen, [(chess.BLACK,) for _ in range(num_games // 3)])
results = []
for s, w, b in zip(results_self, results_white, results_black):
results.extend([s, w, b])
for batch in results:
data, c, mc = batch
print(f"Saving backup model to {CONFIG['backup_model_path']}")
torch.save(model.state_dict(), CONFIG["backup_model_path"])
if data:
train(data, c, mc)
print("Training complete.")
engine.quit()
if __name__ == "__main__":
main()