HaileyStorm
commited on
Update chess-gpt-eval-contrastive/mamba_module.py
Browse files
chess-gpt-eval-contrastive/mamba_module.py
CHANGED
@@ -126,7 +126,7 @@ class MambaPlayer:
|
|
126 |
tensor_output = output
|
127 |
seq_len = tensor_output.shape[1]
|
128 |
bucket = next(b for b in self.move_buckets if self.move_num <= b)
|
129 |
-
self.activations_sum[layer_idx][bucket]["current"][:, :8, :] += tensor_output.detach().cpu().numpy()[:, :
|
130 |
self.activations_count[layer_idx][bucket]["current"] += 1
|
131 |
|
132 |
self.hooks.append(layer.register_forward_hook(hook))
|
@@ -325,8 +325,8 @@ class MambaPlayer:
|
|
325 |
def get_lr(it):
|
326 |
warmup_iters = 0 #300 * 43
|
327 |
lr_decay_iters = 3000 * 43
|
328 |
-
learning_rate = 0.
|
329 |
-
min_lr = 0.
|
330 |
# 1) linear warmup for warmup_iters steps
|
331 |
if it < warmup_iters:
|
332 |
return learning_rate * it / warmup_iters
|
|
|
126 |
tensor_output = output
|
127 |
seq_len = tensor_output.shape[1]
|
128 |
bucket = next(b for b in self.move_buckets if self.move_num <= b)
|
129 |
+
self.activations_sum[layer_idx][bucket]["current"][:, :min(8, self.seq_len), :] += tensor_output.detach().cpu().numpy()[:, :self.seq_len, :][:, -8:, :]
|
130 |
self.activations_count[layer_idx][bucket]["current"] += 1
|
131 |
|
132 |
self.hooks.append(layer.register_forward_hook(hook))
|
|
|
325 |
def get_lr(it):
|
326 |
warmup_iters = 0 #300 * 43
|
327 |
lr_decay_iters = 3000 * 43
|
328 |
+
learning_rate = 0.000075
|
329 |
+
min_lr = 0.0000075
|
330 |
# 1) linear warmup for warmup_iters steps
|
331 |
if it < warmup_iters:
|
332 |
return learning_rate * it / warmup_iters
|