HaileyStorm
/

chess-mamba-vs-xformer

HaileyStorm commited on May 3, 2024

Commit

0fbfa94

verified ·

1 Parent(s): 6b2d08d

Update chess-gpt-eval-contrastive/mamba_module.py

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -126,7 +126,7 @@ class MambaPlayer:
                         tensor_output = output
                     seq_len = tensor_output.shape[1]
                     bucket = next(b for b in self.move_buckets if self.move_num <= b)
-                    self.activations_sum[layer_idx][bucket]["current"][:, :8, :] += tensor_output.detach().cpu().numpy()[:, :self.seq_len, :][:, -8:, :]
                     self.activations_count[layer_idx][bucket]["current"] += 1
                 self.hooks.append(layer.register_forward_hook(hook))
@@ -323,9 +323,9 @@ class MambaPlayer:
     def train_linear_probes(self):
         def get_lr(it):
-            warmup_iters = 300 * 43
             lr_decay_iters = 5000 * 43
-            learning_rate = 0.0003
             min_lr = 0.00001
             # 1) linear warmup for warmup_iters steps
             if it < warmup_iters:

                         tensor_output = output
                     seq_len = tensor_output.shape[1]
                     bucket = next(b for b in self.move_buckets if self.move_num <= b)
+                    self.activations_sum[layer_idx][bucket]["current"][:, :8, :] += tensor_output.detach().cpu().numpy()[:, :max(self.seq_len, 8), :][:, -8:, :]
                     self.activations_count[layer_idx][bucket]["current"] += 1
                 self.hooks.append(layer.register_forward_hook(hook))
     def train_linear_probes(self):
         def get_lr(it):
+            warmup_iters = 0 #300 * 43
             lr_decay_iters = 5000 * 43
+            learning_rate = 0.000265
             min_lr = 0.00001
             # 1) linear warmup for warmup_iters steps
             if it < warmup_iters: