ChessAI-Community
/

NeoChess-Community

@@ -87,8 +87,43 @@ class NN1(nn.Module):
         x = self.neurons(x)
         return x
 model = NN1().to(device)
 optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
 try:
     model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
@@ -119,56 +154,41 @@ def get_evaluation(board):
     else:
         return -evaluation
-def search(board, depth, simulations=100):
     """
-    Monte Carlo Tree Search with basic negamax evaluation.
-    Reads moves from san_moves.txt and picks the best move.
     """
-    # Load predefined moves from SAN file
-    san_file_path = Path("ChessEnv/san_moves.txt")
-    if san_file_path.exists():
-        with open(san_file_path, "r") as f:
-            san_moves = [line.strip() for line in f if line.strip()]
-    else:
-        san_moves = []
-    def negamax(board, depth, alpha, beta):
-        if depth == 0 or board.is_game_over():
-            return get_evaluation(board)
-        max_eval = float('-inf')
-        for move in board.legal_moves:
             board.push(move)
-            eval = -negamax(board, depth - 1, -beta, -alpha)
             board.pop()
-            max_eval = max(max_eval, eval)
-            alpha = max(alpha, eval)
-            if alpha >= beta:
-                break
-        return max_eval
-    move_scores = {}
-    for _ in range(simulations):
-        # Optionally start from a random legal move (Monte Carlo rollout)
-        move = random.choice(list(board.legal_moves))
-        # If san_moves.txt is not empty, prefer moves from it when possible
-        move_san = board.san(move)
-        if san_moves and move_san in san_moves:
-            move = chess.Move.from_uci(board.parse_san(move_san).uci())
-        board.push(move)
-        score = -negamax(board, depth - 1, -float('inf'), float('inf'))
-        board.pop()
-        move_scores[move] = move_scores.get(move, 0) + score
-    # Pick move with highest average score
-    best_move = max(move_scores.items(), key=lambda x: x[1] / simulations)[0]
-    return best_move

         x = self.neurons(x)
         return x
+lass Policy(nn.Module):
+  def __init__(self):
+    super().__init__()
+    self.embedding = nn.Embedding(13, 32)
+    self.attention = nn.MultiheadAttention(embed_dim=32, num_heads=16)
+    self.neu = 256
+    self.neurons = nn.Sequential(
+            nn.Linear(64*32, self.neu),
+            nn.ReLU(),
+            nn.Linear(self.neu, self.neu),
+            nn.ReLU(),
+            nn.Linear(self.neu, self.neu),
+            nn.ReLU(),
+            nn.Linear(self.neu, self.neu),
+            nn.ReLU(),
+            nn.Linear(self.neu, 128),
+            nn.ReLU(),
+            nn.Linear(128,  29275),
+        )
+  def forward(self, x):
+        x = chess.Board(x)
+        color = x.turn
+        x = board_to_tensor(x)
+        x = self.embedding(x)
+        x = x.permute(1, 0, 2)
+        attn_output, _ = self.attention(x, x, x)
+        x = attn_output.permute(1, 0, 2).contiguous()
+        x = x.view(x.size(0), -1)
+        x = self.neurons(x) * color
+        return x
 model = NN1().to(device)
 optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])
+policy = Policy().to(device)
+polweight = torch.load("NeoChess/chessy_policy.pth",map_location=device,weights_only=False)
+policy.load_state_dict(polweight)
 try:
     model.load_state_dict(torch.load(CONFIG["model_path"], map_location=device))
     else:
         return -evaluation
+with open("/usr/local/python/3.12.1/lib/python3.12/site-packages/torchrl/envs/custom/san_moves.txt", "r") as f:
+    uci_to_index = {line.strip(): i for i, line in enumerate(f)}
+def search(board ,depth ,policy_net=policy, simulations=100, temperature=1.0, device="cpu"):
     """
+    Monte Carlo search using policy network for move selection
+    and value network via get_evaluation().
     """
+    # Convert board FEN to tensor for policy_net
+    depth
+    with torch.no_grad():
+        fen_tensor = torch.tensor([board.fen()], device=device)  # Adjust if your FEN encoding differs
+        logits = policy_net(fen_tensor)["logits"].squeeze(0)
+        probs = torch.softmax(logits / temperature, dim=-1).cpu().numpy()
+    move_scores = {move: 0 for move in board.legal_moves}
+    for move in board.legal_moves:
+        total_eval = 0
+        for _ in range(simulations):
             board.push(move)
+            eval_score = get_evaluation(board)  # Uses value net
+            total_eval += eval_score
             board.pop()
+        move_scores[move] = total_eval / simulations
+    # Weight evaluations by policy prior
+    for move in move_scores:
+        move_index = uci_to_index[str(move)]
+        move_scores[move] *= probs[move_index]
+    # Pick best move
+    best_move = max(move_scores, key=move_scores.get)
+    return best_move, move_scores