HaileyStorm
commited on
Upload chess-mamba-vs-xformer/config/Mamba/50M.py with huggingface_hub
Browse files
chess-mamba-vs-xformer/config/Mamba/50M.py
CHANGED
@@ -17,14 +17,14 @@ max_seq_len = 1536
|
|
17 |
base_batch_size = 256
|
18 |
|
19 |
batch_size = 50
|
20 |
-
gradient_accumulation_steps = 2
|
21 |
effective_batch_size = batch_size * gradient_accumulation_steps
|
22 |
|
23 |
always_save_checkpoint = True
|
24 |
-
eval_interval =
|
25 |
-
eval_iters =
|
26 |
-
log_interval =
|
27 |
-
train_file_update_interval =
|
28 |
|
29 |
warmup_iters = 500 # not super necessary potentially
|
30 |
learning_rate = 1.5e-3 # tested 1.5e-3 from 112k-156k, before that 3.5e-3 #8e-3
|
@@ -64,7 +64,7 @@ d_state = 32
|
|
64 |
dt_rank = 56
|
65 |
move_num_in_gamestate = False
|
66 |
|
67 |
-
init_from = '
|
68 |
|
69 |
device = 'cuda' # run on cpu only
|
70 |
compile = False # do not torch compile the model
|
|
|
17 |
base_batch_size = 256
|
18 |
|
19 |
batch_size = 50
|
20 |
+
gradient_accumulation_steps = 2 #25
|
21 |
effective_batch_size = batch_size * gradient_accumulation_steps
|
22 |
|
23 |
always_save_checkpoint = True
|
24 |
+
eval_interval = 60
|
25 |
+
eval_iters = 1.5
|
26 |
+
log_interval = 0.01
|
27 |
+
train_file_update_interval = 1 # 23 was original ... 7 definitely crashes (maybe try 10 on Lambda)
|
28 |
|
29 |
warmup_iters = 500 # not super necessary potentially
|
30 |
learning_rate = 1.5e-3 # tested 1.5e-3 from 112k-156k, before that 3.5e-3 #8e-3
|
|
|
64 |
dt_rank = 56
|
65 |
move_num_in_gamestate = False
|
66 |
|
67 |
+
init_from = 'resume'
|
68 |
|
69 |
device = 'cuda' # run on cpu only
|
70 |
compile = False # do not torch compile the model
|