HaileyStorm
/

chess-mamba-vs-xformer

Model card Files Files and versions Community

HaileyStorm commited on Apr 28, 2024

Commit

a73c8da

verified ·

1 Parent(s): abd7c69

Update chess-gpt-eval-contrastive/mamba_module.py

Browse files

Files changed (1) hide show

chess-gpt-eval-contrastive/mamba_module.py +23 -17

chess-gpt-eval-contrastive/mamba_module.py CHANGED Viewed

@@ -101,10 +101,16 @@ class MambaPlayer:
             self.activations_count = {}
         if update_linear:
             if linear_probe_path and os.path.exists(linear_probe_path):
-                with open(linear_probe_path, 'rb') as f:
-                    self.linear_probes = pickle.load(f)
             else:
                 self.linear_probes = {}
         if update_contrastive or update_linear:
             for i, layer in enumerate(self.model.backbone.layers):
                 self.activations_sum[i] = {bucket: {"won": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
@@ -128,9 +134,9 @@ class MambaPlayer:
                 if update_linear:
                     if not linear_probe_path or not os.path.exists(linear_probe_path):
                         self.linear_probes[i] = {
-                            'q_value': LinearRegression(),
-                            'q_value_delta': LinearRegression(),
-                            'material_balance': LinearRegression()
                         }
             if update_linear:
                 self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
@@ -304,27 +310,27 @@ class MambaPlayer:
             self.linear_probe_targets[layer_idx][bucket]['q_value_delta'].append(q_value_delta)
             self.linear_probe_targets[layer_idx][bucket]['material_balance'].append(material_bal)
-    def train_linear_probes(self):
         for layer_idx in self.linear_probes:
             for bucket in self.move_buckets:
                 if self.activations_count[layer_idx][bucket]['current'] > 0:
-                    X = self.activations_sum[layer_idx][bucket]['current'] / self.activations_count[layer_idx][bucket]['current']
-                    X = X.reshape(X.shape[1], -1)  # Reshape X to have shape (sequence_length, flattened_features)
                     for probe_type in ['q_value', 'q_value_delta', 'material_balance']:
-                        y = np.array(self.linear_probe_targets[layer_idx][bucket][probe_type])
                         if len(y) > 0:
-                            # Repeat X to match the number of target values
-                            X_repeated = np.repeat(X, len(y), axis=0)
-                            self.linear_probes[layer_idx][probe_type].fit(X_repeated, y)
-                        else:
-                            print(f"Skipping training for layer {layer_idx}, bucket {bucket}, probe type {probe_type} due to empty target values.")
         # Reset linear_probe_targets after training
         self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
     def save_linear_probe_data(self, path):
-        with open(path, 'wb') as f:
-            pickle.dump(self.linear_probes, f)
     def evaluate_linear_probes(self, board: chess.Board, game_state: str):
         self.move_num = game_state.count('.')

             self.activations_count = {}
         if update_linear:
             if linear_probe_path and os.path.exists(linear_probe_path):
+                self.linear_probes = torch.load(linear_probe_path)
             else:
                 self.linear_probes = {}
+            slef.linear_optimizers = {
+                layer_idx: {
+                    probe_type: optim.Adam(self.linear_probes[layer_idx][probe_type].parameters(), lr=lr)
+                    for probe_type in ['q_value', 'q_value_delta', 'material_balance']
+                }
+                for layer_idx in self.linear_probes
+            }
         if update_contrastive or update_linear:
             for i, layer in enumerate(self.model.backbone.layers):
                 self.activations_sum[i] = {bucket: {"won": np.zeros((1, self.max_seq_len, self.model.config.d_model)),
                 if update_linear:
                     if not linear_probe_path or not os.path.exists(linear_probe_path):
                         self.linear_probes[i] = {
+                            'q_value': nn.Linear(self.model.config.d_model, 1),
+                            'q_value_delta': nn.Linear(self.model.config.d_model, 1),
+                            'material_balance': nn.Linear(self.model.config.d_model, 1)
                         }
             if update_linear:
                 self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
             self.linear_probe_targets[layer_idx][bucket]['q_value_delta'].append(q_value_delta)
             self.linear_probe_targets[layer_idx][bucket]['material_balance'].append(material_bal)
+    def train_linear_probes(self, lr=0.01):
+        criterion = nn.MSELoss()
         for layer_idx in self.linear_probes:
             for bucket in self.move_buckets:
                 if self.activations_count[layer_idx][bucket]['current'] > 0:
+                    X = torch.from_numpy(self.activations_sum[layer_idx][bucket]['current'] #/ self.activations_count[layer_idx][bucket]['current']).float()
                     for probe_type in ['q_value', 'q_value_delta', 'material_balance']:
+                        y = torch.tensor(self.linear_probe_targets[layer_idx][bucket][probe_type]).float().unsqueeze(1)
                         if len(y) > 0:
+                            y_pred = self.linear_probes[layer_idx][probe_type](X)
+                            loss = criterion(y_pred, y)
+                            self.linear_optimizers[layer_idx][probe_type].zero_grad()
+                            loss.backward()
+                            self.linear_optimizers[layer_idx][probe_type].step()
         # Reset linear_probe_targets after training
         self.linear_probe_targets = {i: {bucket: {'q_value': [], 'q_value_delta': [], 'material_balance': []} for bucket in self.move_buckets} for i in self.linear_probes}
     def save_linear_probe_data(self, path):
+        torch.save(self.linear_probes, path)
     def evaluate_linear_probes(self, board: chess.Board, game_state: str):
         self.move_num = game_state.count('.')