flash-attention / adamw-apex-zero.yaml
theonlyengine's picture
Upload 421 files
3f9c425 verified
raw
history blame contribute delete
196 Bytes
# @package train.optimizer
_target_: torch.distributed.optim.ZeroRedundancyOptimizer
_recursive_: True
optimizer_class:
_target_: apex.optimizers.FusedAdam
_partial_: True
adam_w_mode: True