HaileyStorm
/

chess-mamba-vs-xformer

Model card Files Files and versions Community

HaileyStorm commited on May 1, 2024

Commit

bf84c14

verified ·

1 Parent(s): 498c07f

Update chess-gpt-eval-contrastive/mamba_module.py

Browse files

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py +19 -7

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -320,14 +320,26 @@ class MambaPlayer:
             self.linear_probe_targets[layer_idx][bucket]['material_balance'].append(material_bal)
     def train_linear_probes(self):
         criterion = nn.MSELoss()
         self.wandb_step += 1
-        decay_iters = 2000 * 43
-        learning_rate = 0.00025
-        min_lr = 0.000025
-        coeff = 0.5 * (1.0 + math.cos(math.pi * min(self.wandb_step / decay_iters, 1.0)))
-        lr = min_lr + coeff * (learning_rate - min_lr)
         for layer_idx in self.linear_probes:
             for bucket in self.move_buckets:
@@ -367,7 +379,7 @@ class MambaPlayer:
             for probe_type in ['q_value', 'q_value_delta', 'material_balance']:
                 target = torch.tensor(self.linear_probe_targets[layer_idx][bucket][probe_type]).float().item()
                 probe = self.linear_probes[layer_idx][probe_type]
-                probe.eval()
                 prediction = probe(X).item()
                 print(f"Layer {layer_idx}, {probe_type}: {prediction} vs {target}")
         self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}

             self.linear_probe_targets[layer_idx][bucket]['material_balance'].append(material_bal)
     def train_linear_probes(self):
+        def get_lr(it):
+            warmup_iters = 25 * 43
+            lr_decay_iters = 5000 * 43
+            learning_rate = 0.025
+            min_lr = 0.0001
+            # 1) linear warmup for warmup_iters steps
+            if it < warmup_iters:
+                return learning_rate * it / warmup_iters
+            # 2) if it > lr_decay_iters, return min learning rate
+            if it > lr_decay_iters:
+                return min_lr
+            # 3) in between, use cosine decay down to min learning rate
+            decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
+            assert 0 <= decay_ratio <= 1
+            coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1
+            return min_lr + coeff * (learning_rate - min_lr)
         criterion = nn.MSELoss()
         self.wandb_step += 1
+        lr = get_lr(self.wandb_step)
         for layer_idx in self.linear_probes:
             for bucket in self.move_buckets:
             for probe_type in ['q_value', 'q_value_delta', 'material_balance']:
                 target = torch.tensor(self.linear_probe_targets[layer_idx][bucket][probe_type]).float().item()
                 probe = self.linear_probes[layer_idx][probe_type]
+                #probe.eval()
                 prediction = probe(X).item()
                 print(f"Layer {layer_idx}, {probe_type}: {prediction} vs {target}")
         self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}