HaileyStorm
/

chess-mamba-vs-xformer

HaileyStorm commited on Apr 23, 2024

Commit

9082726

verified ·

1 Parent(s): ae3133e

Update chess-gpt-eval-contrastive/mamba_module.py

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -222,7 +222,7 @@ class MambaPlayer:
             self.activations_count[layer_idx] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                          for bucket in self.move_buckets}
-    def apply_contrastive_activations(self, path):
         if os.path.exists(path):
             with open(path, "rb") as f:
                 activations_sum, activations_count = pickle.load(f)
@@ -232,7 +232,7 @@ class MambaPlayer:
                 won_activations = activations_sum[layer_idx][bucket]["won"] / activations_count[layer_idx][bucket]["won"]
                 lost_activations = activations_sum[layer_idx][bucket]["lost"] / activations_count[layer_idx][bucket]["lost"]
                 contrastive_activations = won_activations - lost_activations
-                return output + torch.from_numpy(contrastive_activations[:, :seq_len, :]).to(output.device)
             for layer_idx in activations_sum:
                 for bucket in self.move_buckets:

             self.activations_count[layer_idx] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                          for bucket in self.move_buckets}
+    def apply_contrastive_activations(self, path, weight=1.0):
         if os.path.exists(path):
             with open(path, "rb") as f:
                 activations_sum, activations_count = pickle.load(f)
                 won_activations = activations_sum[layer_idx][bucket]["won"] / activations_count[layer_idx][bucket]["won"]
                 lost_activations = activations_sum[layer_idx][bucket]["lost"] / activations_count[layer_idx][bucket]["lost"]
                 contrastive_activations = won_activations - lost_activations
+                return output + torch.from_numpy(contrastive_activations[:, :seq_len, :]).to(output.device) * weight
             for layer_idx in activations_sum:
                 for bucket in self.move_buckets: