teachyourselfcoding's picture
Upload 245 files
fa6856c
raw
history blame contribute delete
682 Bytes
{
"train_batch_size": 128,
"fp16": {
"enabled": true,
"min_loss_scale": 1,
"opt_level": "O2"
},
"zero_optimization": {
"stage": 2,
"offload_param": {
"device": "cpu"
},
"offload_optimizer": {
"device": "cpu"
},
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"contiguous_gradients": true
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": 1e-05,
"betas": [
0.9,
0.95
],
"eps": 1e-08
}
},
"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": 0,
"warmup_max_lr": 1e-05,
"warmup_num_steps": "auto"
}
}
}