HaileyStorm
/

chess-mamba-vs-xformer

Model card Files Files and versions Community

HaileyStorm commited on Apr 28, 2024

Commit

f67254a

verified ·

1 Parent(s): 5238925

Update chess-gpt-eval-contrastive/mamba_module.py

Browse files

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py +19 -1

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -145,6 +145,8 @@ class MambaPlayer:
                     for layer_idx in self.linear_probes
                 }
                 wandb.init(project="mamba_linear_probes", name=f"mamba_linear_probes")
     def get_mamba_response(self, game_state: str, temperature: float, max_new_tokens: int, top_k: int):
         game_state = game_state.split("\n\n")[-1].strip()
@@ -317,6 +319,13 @@ class MambaPlayer:
     def train_linear_probes(self):
         criterion = nn.MSELoss()
         for layer_idx in self.linear_probes:
             for bucket in self.move_buckets:
@@ -327,16 +336,25 @@ class MambaPlayer:
                         if len(y) > 0:
                             y_pred = self.linear_probes[layer_idx][probe_type](X)
                             loss = criterion(y_pred, y)
                             self.linear_optimizers[layer_idx][probe_type].zero_grad()
                             loss.backward()
                             self.linear_optimizers[layer_idx][probe_type].step()
                             #wandb.log({f"{probe_type}/layer_{layer_idx}_{bucket}_loss": loss.item()})
-                            wandb.log({f"{probe_type}/layer_{layer_idx}_loss": loss.item()})
         # Reset linear_probe_targets after training
         self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
     def save_linear_probe_data(self, path):
         torch.save(self.linear_probes, path)
     def evaluate_linear_probes(self, board: chess.Board, game_state: str):

                     for layer_idx in self.linear_probes
                 }
                 wandb.init(project="mamba_linear_probes", name=f"mamba_linear_probes")
+                self.wandb_step = 0
+                self.linear_save_ct += 1
     def get_mamba_response(self, game_state: str, temperature: float, max_new_tokens: int, top_k: int):
         game_state = game_state.split("\n\n")[-1].strip()
     def train_linear_probes(self):
         criterion = nn.MSELoss()
+        self.wandb_step += 1
+        decay_iters = 2000 * 40
+        learning_rate = 0.01
+        min_lr = 0.0001
+        coeff = 0.5 * (1.0 + math.cos(math.pi * min(self.wandb_step / decay_iters, 1.0)))
+        lr = min_lr + coeff * (learning_rate - min_lr)
         for layer_idx in self.linear_probes:
             for bucket in self.move_buckets:
                         if len(y) > 0:
                             y_pred = self.linear_probes[layer_idx][probe_type](X)
                             loss = criterion(y_pred, y)
+                            for param_group in self.linear_optimizers[layer_idx][probe_type].param_groups:
+                                param_group['lr'] = lr
                             self.linear_optimizers[layer_idx][probe_type].zero_grad()
                             loss.backward()
                             self.linear_optimizers[layer_idx][probe_type].step()
                             #wandb.log({f"{probe_type}/layer_{layer_idx}_{bucket}_loss": loss.item()})
+                            wandb.log({
+                                "etc/lr": lr,
+                                f"{probe_type}/layer_{layer_idx}_loss": loss.item()
+                            }, step=self.wandb_step)
         # Reset linear_probe_targets after training
         self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
     def save_linear_probe_data(self, path):
+        self.linear_save_ct += 1
+        wandb.log({
+            "etc/games": self.linear_save_ct
+        }, step=self.wandb_step)
         torch.save(self.linear_probes, path)
     def evaluate_linear_probes(self, board: chess.Board, game_state: str):