Spaces:
Running
Running
feat(train): custom start_preconditioning_step
Browse files- tools/train/train.py +5 -1
tools/train/train.py
CHANGED
@@ -248,6 +248,10 @@ class TrainingArguments:
|
|
248 |
default=1024,
|
249 |
metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
|
250 |
)
|
|
|
|
|
|
|
|
|
251 |
preconditioning_compute_steps: int = field(
|
252 |
default=10, metadata={"help": "Number of steps to update preconditioner."}
|
253 |
)
|
@@ -608,7 +612,7 @@ def main():
|
|
608 |
beta2=training_args.beta2,
|
609 |
diagonal_epsilon=1e-10,
|
610 |
matrix_epsilon=1e-8,
|
611 |
-
start_preconditioning_step=training_args.
|
612 |
preconditioning_compute_steps=training_args.preconditioning_compute_steps,
|
613 |
statistics_compute_steps=1,
|
614 |
best_effort_shape_interpretation=True,
|
|
|
248 |
default=1024,
|
249 |
metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
|
250 |
)
|
251 |
+
start_preconditioning_step: int = field(
|
252 |
+
default=100,
|
253 |
+
metadata={"help": "Number of steps before starting to update preconditioner."},
|
254 |
+
)
|
255 |
preconditioning_compute_steps: int = field(
|
256 |
default=10, metadata={"help": "Number of steps to update preconditioner."}
|
257 |
)
|
|
|
612 |
beta2=training_args.beta2,
|
613 |
diagonal_epsilon=1e-10,
|
614 |
matrix_epsilon=1e-8,
|
615 |
+
start_preconditioning_step=training_args.start_preconditioning_step,
|
616 |
preconditioning_compute_steps=training_args.preconditioning_compute_steps,
|
617 |
statistics_compute_steps=1,
|
618 |
best_effort_shape_interpretation=True,
|