|
{ |
|
"cache_dir": null, |
|
"model_max_length": 2560, |
|
"max_steps": -1, |
|
"num_train_epoches": 2, |
|
"per_device_train_batch_size": 8, |
|
"gradient_accumulation_steps": 1, |
|
"gradient_checkpointing": true, |
|
"eval_steps": 500000000, |
|
"eval_epoches": 1, |
|
"per_device_eval_batch_size": 4, |
|
"learning_rate": 1e-05, |
|
"weight_decay": 0.0, |
|
"lr_scheduler_type": "linear", |
|
"warmup_steps": 0, |
|
"warmup_ratio": 0, |
|
"logging_steps": 8, |
|
"logging_epoches": 1, |
|
"save_steps": 50000000000, |
|
"save_epoches": 1, |
|
"save_total_limit": 0, |
|
"save_best": false, |
|
"seed": 42, |
|
"num_training_steps_per_epoch": 6160, |
|
"num_updating_steps_per_epoch": 6160, |
|
"num_eval_steps_per_epoch": null, |
|
"num_training_steps": 12320, |
|
"num_training_steps_aggr_devices": 98560, |
|
"num_updating_steps": 12320, |
|
"num_updating_steps_aggr_devices": 98560, |
|
"num_eval_steps": null, |
|
"per_eval_steps": 500000000, |
|
"num_updating_warmup_steps": 0, |
|
"num_updating_warmup_steps_aggr_devices": 0, |
|
"num_logging_steps": 8, |
|
"per_save_steps": 50000000000 |
|
} |