DebertaV2-Base-10M_babylm-A__mrpc / trainer_state.json
Ar4l's picture
Upload folder using huggingface_hub
e8f5476 verified
{
"best_metric": 0.8403908794788274,
"best_model_checkpoint": "/home/ubuntu/utah/babylm-24/src/evaluation/results/finetune/DebertaV2-Base-10M_babylm-A/mrpc/checkpoint-2295",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 2295,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6960784196853638,
"eval_f1": 0.8109756097560976,
"eval_loss": 0.5777735710144043,
"eval_mcc": 0.17012280758673753,
"eval_runtime": 0.347,
"eval_samples_per_second": 587.838,
"eval_steps_per_second": 74.921,
"step": 459
},
{
"epoch": 1.0893246187363834,
"grad_norm": 2.380143880844116,
"learning_rate": 1.5642701525054468e-05,
"loss": 0.6139,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.720588207244873,
"eval_f1": 0.8235294117647058,
"eval_loss": 0.5808757543563843,
"eval_mcc": 0.26984949202435965,
"eval_runtime": 0.3355,
"eval_samples_per_second": 608.081,
"eval_steps_per_second": 77.5,
"step": 918
},
{
"epoch": 2.178649237472767,
"grad_norm": 6.764563083648682,
"learning_rate": 1.1285403050108935e-05,
"loss": 0.5239,
"step": 1000
},
{
"epoch": 3.0,
"eval_accuracy": 0.7598039507865906,
"eval_f1": 0.8382838283828383,
"eval_loss": 0.6974886655807495,
"eval_mcc": 0.4042253913596411,
"eval_runtime": 0.3292,
"eval_samples_per_second": 619.643,
"eval_steps_per_second": 78.974,
"step": 1377
},
{
"epoch": 3.2679738562091503,
"grad_norm": 30.021303176879883,
"learning_rate": 6.928104575163399e-06,
"loss": 0.3466,
"step": 1500
},
{
"epoch": 4.0,
"eval_accuracy": 0.7549019455909729,
"eval_f1": 0.8387096774193549,
"eval_loss": 1.0106741189956665,
"eval_mcc": 0.3852755834560997,
"eval_runtime": 0.3222,
"eval_samples_per_second": 633.08,
"eval_steps_per_second": 80.687,
"step": 1836
},
{
"epoch": 4.357298474945534,
"grad_norm": 28.98902702331543,
"learning_rate": 2.570806100217865e-06,
"loss": 0.2213,
"step": 2000
},
{
"epoch": 5.0,
"eval_accuracy": 0.7598039507865906,
"eval_f1": 0.8403908794788274,
"eval_loss": 1.1580651998519897,
"eval_mcc": 0.40096662922574366,
"eval_runtime": 0.3547,
"eval_samples_per_second": 575.054,
"eval_steps_per_second": 73.291,
"step": 2295
},
{
"epoch": 5.0,
"step": 2295,
"total_flos": 1082018524016640.0,
"train_loss": 0.3891895601692283,
"train_runtime": 145.114,
"train_samples_per_second": 126.383,
"train_steps_per_second": 15.815
}
],
"logging_steps": 500,
"max_steps": 2295,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1082018524016640.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}