NeoChess-Community / selfchess-colab.py

Update selfchess-colab.py

f87211d verified 15 days ago

7.08 kB

	import os
	os.system('pip install chess')
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import chess
	import os
	import chess.engine as eng
	import torch.multiprocessing as mp

	# CONFIGURATION
	CONFIG = {
	"stockfish_path": "/usr/games/stockfish",
	"model_path": "NeoChess-Community/chessy_model.pth",
	"backup_model_path": "NeoChess-Community/chessy_modelt-1.pth",
	"device": torch.device("cuda"),
	"learning_rate": 1e-4,
	"num_games": 30,
	"num_epochs": 10,
	"stockfish_time_limit": 1.0,
	"search_depth": 1,
	"epsilon": 4
	}

	device = CONFIG["device"]

	def board_to_tensor(board):
	piece_encoding = {
	'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6,
	'p': 7, 'n': 8, 'b': 9, 'r': 10, 'q': 11, 'k': 12
	}

	tensor = torch.zeros(64, dtype=torch.long)
	for square in chess.SQUARES:
	piece = board.piece_at(square)
	if piece:
	tensor[square] = piece_encoding[piece.symbol()]
	else:
	tensor[square] = 0

	return tensor.unsqueeze(0)

	class NN1(nn.Module):
	def __init__(self):
	super().__init__()
	self.embedding = nn.Embedding(13, 64)
	self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=16)
	self.neu = 512
	self.neurons = nn.Sequential(
	nn.Linear(4096, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, self.neu),
	nn.ReLU(),
	nn.Linear(self.neu, 64),
	nn.ReLU(),
	nn.Linear(64, 4)
	)

	def forward(self, x):
	x = self.embedding(x)
	x = x.permute(1, 0, 2)
	attn_output, _ = self.attention(x, x, x)
	x = attn_output.permute(1, 0, 2).contiguous()
	x = x.view(x.size(0), -1)
	x = self.neurons(x)
	return x

	model = NN1().to(device)
	optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])

	try:
	model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
	print(f"Loaded model from {CONFIG['model_path']}")
	except FileNotFoundError:
	try:
	model.load_state_dict(torch.load(CONFIG["backup_model_path"], map_location=device))
	print(f"Loaded backup model from {CONFIG['backup_model_path']}")
	except FileNotFoundError:
	print("No model file found, starting from scratch.")

	model.train()
	criterion = nn.MSELoss()
	engine = eng.SimpleEngine.popen_uci(CONFIG["stockfish_path"])
	lim = eng.Limit(time=CONFIG["stockfish_time_limit"])

	def get_evaluation(board):
	"""
	Returns the evaluation of the board from the perspective of the current player.
	The model's output is from White's perspective.
	"""
	tensor = board_to_tensor(board).to(device)
	with torch.no_grad():
	evaluation = model(tensor)[0][0].item()

	if board.turn == chess.WHITE:
	return evaluation
	else:
	return -evaluation

	def search(board, depth, alpha, beta):
	"""
	A negamax search function.
	"""
	if depth == 0 or board.is_game_over():
	return get_evaluation(board)

	max_eval = float('-inf')
	for move in board.legal_moves:
	board.push(move)
	eval = -search(board, depth - 1, -beta, -alpha)
	board.pop()
	max_eval = max(max_eval, eval)
	alpha = max(alpha, eval)
	if alpha >= beta:
	break
	return max_eval




	def game_gen(engine_side):
	data = []
	mc = 0
	board = chess.Board()
	while not board.is_game_over():
	is_bot_turn = board.turn != engine_side

	if is_bot_turn:
	evaling = {}
	for move in board.legal_moves:
	board.push(move)
	evaling[move] = -search(board, depth=CONFIG["search_depth"], alpha=float('-inf'), beta=float('inf'))
	board.pop()

	if not evaling:
	break

	keys = list(evaling.keys())
	logits = torch.tensor(list(evaling.values())).to(device)
	probs = torch.softmax(logits,dim=0)
	epsilon = min(CONFIG["epsilon"],len(keys))
	bests = torch.multinomial(probs,num_samples=epsilon,replacement=False)
	best_idx = bests[torch.argmax(logits[bests])]
	move = keys[best_idx.item()]

	else:
	result = engine.play(board, lim)
	move = result.move

	if is_bot_turn:
	data.append({
	'fen': board.fen(),
	'move_number': mc,
	})

	board.push(move)
	mc += 1

	result = board.result()
	c = 0
	if result == '1-0':
	c = 10.0
	elif result == '0-1':
	c = -10.0
	return data, c, mc
	def train(data, c, mc):
	for entry in data:
	tensor = board_to_tensor(chess.Board(entry['fen'])).to(device)
	target = torch.tensor(c * entry['move_number'] / mc, dtype=torch.float32).to(device)
	output = model(tensor)[0][0]
	loss = criterion(output, target)
	optimizer.zero_grad()
	loss.backward()
	optimizer.step()

	print(f"Saving model to {CONFIG['model_path']}")
	torch.save(model.state_dict(), CONFIG["model_path"])
	return
	def main():
	for i in range(CONFIG["num_epochs"]):
	mp.set_start_method('spawn', force=True)
	num_games = CONFIG['num_games']
	num_instances = mp.cpu_count()
	print(f"Saving backup model to {CONFIG['backup_model_path']}")
	torch.save(model.state_dict(), CONFIG["backup_model_path"])
	with mp.Pool(processes=num_instances) as pool:
	results_self = pool.starmap(game_gen, [(None,) for _ in range(num_games // 3)])
	results_white = pool.starmap(game_gen, [(chess.WHITE,) for _ in range(num_games // 3)])
	results_black = pool.starmap(game_gen, [(chess.BLACK,) for _ in range(num_games // 3)])
	results = []
	for s, w, b in zip(results_self, results_white, results_black):
	results.extend([s, w, b])
	for batch in results:
	data, c, mc = batch
	print(f"Saving backup model to {CONFIG['backup_model_path']}")
	torch.save(model.state_dict(), CONFIG["backup_model_path"])
	if data:
	train(data, c, mc)
	print("Training complete.")
	engine.quit()
	if __name__ == "__main__":
	main()