HaileyStorm
/

chess-mamba-vs-xformer

Model card Files Files and versions Community

HaileyStorm commited on May 2, 2024

Commit

0c33a38

verified ·

1 Parent(s): b2567ad

Update chess-gpt-eval-contrastive/mamba_module.py

Browse files

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py +7 -7

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -223,9 +223,9 @@ class MambaPlayer:
     def update_activations(self, result):
         for layer_idx in self.activations_sum:
             if result == "reset":
-                self.activations_sum[layer_idx] = {bucket: {"won": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
-                                                    "lost": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
-                                                    "current": np.zeros((1, self.max_seq_len, self.model.config.d_model))}
                                            for bucket in self.move_buckets}
                 self.activations_count[layer_idx] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                              for bucket in self.move_buckets}
@@ -254,7 +254,7 @@ class MambaPlayer:
                     activations_count[layer_idx][bucket] = {}
                 for category in ["won", "lost"]:
                     if category not in activations_sum[layer_idx][bucket]:
-                        activations_sum[layer_idx][bucket][category] = np.zeros((1, self.max_seq_len, self.model.config.d_model))
                         activations_count[layer_idx][bucket][category] = 0
                     activations_sum[layer_idx][bucket][category] += self.activations_sum[layer_idx][bucket][category]
@@ -264,9 +264,9 @@ class MambaPlayer:
             pickle.dump((activations_sum, activations_count), f)
         for layer_idx in self.activations_sum:
-            self.activations_sum[layer_idx] = {bucket: {"won": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
-                                                "lost": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
-                                                "current": np.zeros((1, self.max_seq_len, self.model.config.d_model))}
                                        for bucket in self.move_buckets}
             self.activations_count[layer_idx] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                          for bucket in self.move_buckets}

     def update_activations(self, result):
         for layer_idx in self.activations_sum:
             if result == "reset":
+                self.activations_sum[layer_idx] = {bucket: {"won": np.zeros((1, 8, self.model.config.d_model)),
+                                                    "lost": np.zeros((1, 8, self.model.config.d_model)),
+                                                    "current": np.zeros((1, 8, self.model.config.d_model))}
                                            for bucket in self.move_buckets}
                 self.activations_count[layer_idx] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                              for bucket in self.move_buckets}
                     activations_count[layer_idx][bucket] = {}
                 for category in ["won", "lost"]:
                     if category not in activations_sum[layer_idx][bucket]:
+                        activations_sum[layer_idx][bucket][category] = np.zeros((1, 8, self.model.config.d_model))
                         activations_count[layer_idx][bucket][category] = 0
                     activations_sum[layer_idx][bucket][category] += self.activations_sum[layer_idx][bucket][category]
             pickle.dump((activations_sum, activations_count), f)
         for layer_idx in self.activations_sum:
+            self.activations_sum[layer_idx] = {bucket: {"won": np.zeros((1, 8, self.model.config.d_model)),
+                                                "lost": np.zeros((1, 8, self.model.config.d_model)),
+                                                "current": np.zeros((1, 8, self.model.config.d_model))}
                                        for bucket in self.move_buckets}
             self.activations_count[layer_idx] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                          for bucket in self.move_buckets}