Spaces:

Hazem
/

Fac256xc

Runtime error

boris commited on Apr 29, 2022

Commit

d08bf8d

1 Parent(s): acc1a4a

feat(train): allow nesterov momentum

Files changed (1) hide show

tools/train/train.py CHANGED Viewed

@@ -365,6 +365,10 @@ class TrainingArguments:
             "help": "The type of grafting to use. Can be 'rmsprop_normalized' (default), 'rmsprop', 'adagrad', 'adagrad_normalized', 'sgd' or 'sqrt_n'"
         },
     )
     optim_quantized: bool = field(
         default=False,
         metadata={
@@ -857,7 +861,7 @@ def main():
             statistics_compute_steps=1,
             best_effort_shape_interpretation=True,
             graft_type=graft_type,
-            nesterov=False,
             exponent_override=0,
             statistics_partition_spec=statistics_partition_spec,
             preconditioner_partition_spec=PartitionSpec(

             "help": "The type of grafting to use. Can be 'rmsprop_normalized' (default), 'rmsprop', 'adagrad', 'adagrad_normalized', 'sgd' or 'sqrt_n'"
         },
     )
+    nesterov: bool = field(
+        default=False,
+        metadata={"help": "Use Nesterov momentum for Distributed Shampoo."},
+    )
     optim_quantized: bool = field(
         default=False,
         metadata={
             statistics_compute_steps=1,
             best_effort_shape_interpretation=True,
             graft_type=graft_type,
+            nesterov=training_args.nesterov,
             exponent_override=0,
             statistics_partition_spec=statistics_partition_spec,
             preconditioner_partition_spec=PartitionSpec(