HaileyStorm
/

chess-mamba-vs-xformer

Model card Files Files and versions Community

HaileyStorm commited on May 2, 2024

Commit

4560751

verified ·

1 Parent(s): d29de63

Update chess-gpt-eval-contrastive/mamba_module.py

Browse files

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py +5 -4

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -112,9 +112,9 @@ class MambaPlayer:
         if update_contrastive or update_linear:
             linear_size = self.model.config.d_model * 8 #self.model.config.d_model * self.max_seq_len
             for i, layer in enumerate(self.model.backbone.layers):
-                self.activations_sum[i] = {bucket: {"won": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
-                                                    "lost": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
-                                                    "current": np.zeros((1, self.max_seq_len, self.model.config.d_model))}
                                            for bucket in self.move_buckets}
                 self.activations_count[i] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                              for bucket in self.move_buckets}
@@ -157,6 +157,7 @@ class MambaPlayer:
         # Tokenize the game state
         encoded_prompt = self.encode(game_state)
         input_ids = torch.tensor([encoded_prompt], dtype=torch.long, device=self.device)
         self.model.eval()  # Set the model to evaluation mode
         with torch.no_grad():
@@ -183,8 +184,8 @@ class MambaPlayer:
                 else:
                     have_non_space = True
                 input_ids = torch.cat([input_ids, next_token_id.unsqueeze(0)], dim=1)
-        self.seq_len = input_ids[0].size(dim=0)
         model_response = self.decode(input_ids[0].tolist())
         model_response = model_response[len(game_state):].split(";")[0]
         return model_response

         if update_contrastive or update_linear:
             linear_size = self.model.config.d_model * 8 #self.model.config.d_model * self.max_seq_len
             for i, layer in enumerate(self.model.backbone.layers):
+                self.activations_sum[i] = {bucket: {"won": np.zeros((1, 8, self.model.config.d_model)),
+                                                    "lost": np.zeros((1, 8, self.model.config.d_model)),
+                                                    "current": np.zeros((1, 8, self.model.config.d_model))}
                                            for bucket in self.move_buckets}
                 self.activations_count[i] = {bucket: {"won": 0, "lost": 0, "current": 0}
                                              for bucket in self.move_buckets}
         # Tokenize the game state
         encoded_prompt = self.encode(game_state)
         input_ids = torch.tensor([encoded_prompt], dtype=torch.long, device=self.device)
+        self.seq_len = input_ids[0].size(dim=0)
         self.model.eval()  # Set the model to evaluation mode
         with torch.no_grad():
                 else:
                     have_non_space = True
                 input_ids = torch.cat([input_ids, next_token_id.unsqueeze(0)], dim=1)
+                self.seq_len += 1
         model_response = self.decode(input_ids[0].tolist())
         model_response = model_response[len(game_state):].split(";")[0]
         return model_response