NeoChess-Community / selfchess-colab.py
sigmoidneuron123's picture
Update selfchess-colab.py
f87211d verified
import os
os.system('pip install chess')
import torch
import torch.nn as nn
import torch.optim as optim
import chess
import os
import chess.engine as eng
import torch.multiprocessing as mp
# CONFIGURATION
CONFIG = {
"stockfish_path": "/usr/games/stockfish",
"model_path": "NeoChess-Community/chessy_model.pth",
"backup_model_path": "NeoChess-Community/chessy_modelt-1.pth",
"device": torch.device("cuda"),
"learning_rate": 1e-4,
"num_games": 30,
"num_epochs": 10,
"stockfish_time_limit": 1.0,
"search_depth": 1,
"epsilon": 4
}
device = CONFIG["device"]
def board_to_tensor(board):
piece_encoding = {
'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6,
'p': 7, 'n': 8, 'b': 9, 'r': 10, 'q': 11, 'k': 12
}
tensor = torch.zeros(64, dtype=torch.long)
for square in chess.SQUARES:
piece = board.piece_at(square)
if piece:
tensor[square] = piece_encoding[piece.symbol()]
else:
tensor[square] = 0
return tensor.unsqueeze(0)
class NN1(nn.Module):
def __init__(self):
super().__init__()
self.embedding = nn.Embedding(13, 64)
self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=16)
self.neu = 512
self.neurons = nn.Sequential(
nn.Linear(4096, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, self.neu),
nn.ReLU(),
nn.Linear(self.neu, 64),
nn.ReLU(),
nn.Linear(64, 4)
)
def forward(self, x):
x = self.embedding(x)
x = x.permute(1, 0, 2)
attn_output, _ = self.attention(x, x, x)
x = attn_output.permute(1, 0, 2).contiguous()
x = x.view(x.size(0), -1)
x = self.neurons(x)
return x
model = NN1().to(device)
optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
try:
model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
print(f"Loaded model from {CONFIG['model_path']}")
except FileNotFoundError:
try:
model.load_state_dict(torch.load(CONFIG["backup_model_path"], map_location=device))
print(f"Loaded backup model from {CONFIG['backup_model_path']}")
except FileNotFoundError:
print("No model file found, starting from scratch.")
model.train()
criterion = nn.MSELoss()
engine = eng.SimpleEngine.popen_uci(CONFIG["stockfish_path"])
lim = eng.Limit(time=CONFIG["stockfish_time_limit"])
def get_evaluation(board):
"""
Returns the evaluation of the board from the perspective of the current player.
The model's output is from White's perspective.
"""
tensor = board_to_tensor(board).to(device)
with torch.no_grad():
evaluation = model(tensor)[0][0].item()
if board.turn == chess.WHITE:
return evaluation
else:
return -evaluation
def search(board, depth, alpha, beta):
"""
A negamax search function.
"""
if depth == 0 or board.is_game_over():
return get_evaluation(board)
max_eval = float('-inf')
for move in board.legal_moves:
board.push(move)
eval = -search(board, depth - 1, -beta, -alpha)
board.pop()
max_eval = max(max_eval, eval)
alpha = max(alpha, eval)
if alpha >= beta:
break
return max_eval
def game_gen(engine_side):
data = []
mc = 0
board = chess.Board()
while not board.is_game_over():
is_bot_turn = board.turn != engine_side
if is_bot_turn:
evaling = {}
for move in board.legal_moves:
board.push(move)
evaling[move] = -search(board, depth=CONFIG["search_depth"], alpha=float('-inf'), beta=float('inf'))
board.pop()
if not evaling:
break
keys = list(evaling.keys())
logits = torch.tensor(list(evaling.values())).to(device)
probs = torch.softmax(logits,dim=0)
epsilon = min(CONFIG["epsilon"],len(keys))
bests = torch.multinomial(probs,num_samples=epsilon,replacement=False)
best_idx = bests[torch.argmax(logits[bests])]
move = keys[best_idx.item()]
else:
result = engine.play(board, lim)
move = result.move
if is_bot_turn:
data.append({
'fen': board.fen(),
'move_number': mc,
})
board.push(move)
mc += 1
result = board.result()
c = 0
if result == '1-0':
c = 10.0
elif result == '0-1':
c = -10.0
return data, c, mc
def train(data, c, mc):
for entry in data:
tensor = board_to_tensor(chess.Board(entry['fen'])).to(device)
target = torch.tensor(c * entry['move_number'] / mc, dtype=torch.float32).to(device)
output = model(tensor)[0][0]
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Saving model to {CONFIG['model_path']}")
torch.save(model.state_dict(), CONFIG["model_path"])
return
def main():
for i in range(CONFIG["num_epochs"]):
mp.set_start_method('spawn', force=True)
num_games = CONFIG['num_games']
num_instances = mp.cpu_count()
print(f"Saving backup model to {CONFIG['backup_model_path']}")
torch.save(model.state_dict(), CONFIG["backup_model_path"])
with mp.Pool(processes=num_instances) as pool:
results_self = pool.starmap(game_gen, [(None,) for _ in range(num_games // 3)])
results_white = pool.starmap(game_gen, [(chess.WHITE,) for _ in range(num_games // 3)])
results_black = pool.starmap(game_gen, [(chess.BLACK,) for _ in range(num_games // 3)])
results = []
for s, w, b in zip(results_self, results_white, results_black):
results.extend([s, w, b])
for batch in results:
data, c, mc = batch
print(f"Saving backup model to {CONFIG['backup_model_path']}")
torch.save(model.state_dict(), CONFIG["backup_model_path"])
if data:
train(data, c, mc)
print("Training complete.")
engine.quit()
if __name__ == "__main__":
main()