Llama3.1-8B-MetaMath / training_args.json
KevinChenwx's picture
Upload 11 files
8a7656f verified
{
"cache_dir": null,
"model_max_length": 2560,
"max_steps": -1,
"num_train_epoches": 2,
"per_device_train_batch_size": 8,
"gradient_accumulation_steps": 1,
"gradient_checkpointing": true,
"eval_steps": 500000000,
"eval_epoches": 1,
"per_device_eval_batch_size": 4,
"learning_rate": 1e-05,
"weight_decay": 0.0,
"lr_scheduler_type": "linear",
"warmup_steps": 0,
"warmup_ratio": 0,
"logging_steps": 8,
"logging_epoches": 1,
"save_steps": 50000000000,
"save_epoches": 1,
"save_total_limit": 0,
"save_best": false,
"seed": 42,
"num_training_steps_per_epoch": 6160,
"num_updating_steps_per_epoch": 6160,
"num_eval_steps_per_epoch": null,
"num_training_steps": 12320,
"num_training_steps_aggr_devices": 98560,
"num_updating_steps": 12320,
"num_updating_steps_aggr_devices": 98560,
"num_eval_steps": null,
"per_eval_steps": 500000000,
"num_updating_warmup_steps": 0,
"num_updating_warmup_steps_aggr_devices": 0,
"num_logging_steps": 8,
"per_save_steps": 50000000000
}