HaileyStorm
/

chess-mamba-vs-xformer

HaileyStorm commited on Apr 22, 2024

Commit

4b17b1c

•

1 Parent(s): be25621

Update chess-gpt-eval-contrastive/mamba_module.py

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -96,7 +96,7 @@ class MambaPlayer:
             self.activations[i] = {"won": [], "lost": []}
             def hook(module, input, output, layer_idx=i):
-                self.activations[layer_idx]["current"] = output[0]#.detach().cpu().numpy()
             hook_function = partial(hook, layer_idx=i)
             self.hooks.append(layer.register_forward_hook(hook_function))
@@ -170,30 +170,20 @@ class MambaPlayer:
     def save_activations(self, path):
         activations_sum = {}
         activations_count = {}
         for layer_idx, layer_activations in self.activations.items():
-            if layer_activations["won"]:  # Check if not empty to avoid unnecessary operations
-                won_sum = torch.stack(layer_activations["won"]).sum(dim=0).cpu().numpy()
-            else:
-                return
-            if layer_activations["lost"]:
-                lost_sum = torch.stack(layer_activations["lost"]).sum(dim=0).cpu().numpy()
-            else:
-                return
             activations_sum[layer_idx] = {
-                "won": won_sum,
-                "lost": lost_sum
             }
             activations_count[layer_idx] = {
                 "won": len(layer_activations["won"]),
                 "lost": len(layer_activations["lost"])
             }
         with open(path, "wb") as f:
             pickle.dump((activations_sum, activations_count), f)
         self.activations = {}
     def apply_contrastive_activations(self, path):

             self.activations[i] = {"won": [], "lost": []}
             def hook(module, input, output, layer_idx=i):
+                self.activations[layer_idx]["current"] = output[0].detach().cpu().numpy()
             hook_function = partial(hook, layer_idx=i)
             self.hooks.append(layer.register_forward_hook(hook_function))
     def save_activations(self, path):
         activations_sum = {}
         activations_count = {}
         for layer_idx, layer_activations in self.activations.items():
             activations_sum[layer_idx] = {
+                "won": np.sum(layer_activations["won"], axis=0),
+                "lost": np.sum(layer_activations["lost"], axis=0)
             }
             activations_count[layer_idx] = {
                 "won": len(layer_activations["won"]),
                 "lost": len(layer_activations["lost"])
             }
         with open(path, "wb") as f:
             pickle.dump((activations_sum, activations_count), f)
         self.activations = {}
     def apply_contrastive_activations(self, path):