|
{ |
|
"best_metric": 0.6127167630057804, |
|
"best_model_checkpoint": "/home/ubuntu/utah/babylm-24/src/evaluation/results/finetune/DebertaV2-Base-10M_babylm-A/rte/checkpoint-312", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 1248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5179855823516846, |
|
"eval_f1": 0.6127167630057804, |
|
"eval_loss": 0.6988222599029541, |
|
"eval_mcc": 0.09869207097117276, |
|
"eval_runtime": 0.2603, |
|
"eval_samples_per_second": 534.027, |
|
"eval_steps_per_second": 69.155, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6025641025641026, |
|
"grad_norm": 2.8960421085357666, |
|
"learning_rate": 2.7596153846153846e-05, |
|
"loss": 0.6829, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5899280309677124, |
|
"eval_f1": 0.4864864864864865, |
|
"eval_loss": 0.6877012848854065, |
|
"eval_mcc": 0.1635282583804096, |
|
"eval_runtime": 0.2394, |
|
"eval_samples_per_second": 580.645, |
|
"eval_steps_per_second": 75.191, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.46043166518211365, |
|
"eval_f1": 0.5714285714285714, |
|
"eval_loss": 1.203949213027954, |
|
"eval_mcc": -0.03987122665174367, |
|
"eval_runtime": 0.2479, |
|
"eval_samples_per_second": 560.646, |
|
"eval_steps_per_second": 72.602, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.2051282051282053, |
|
"grad_norm": 10.899106979370117, |
|
"learning_rate": 2.5192307692307694e-05, |
|
"loss": 0.4839, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.49640288949012756, |
|
"eval_f1": 0.5454545454545454, |
|
"eval_loss": 1.811629295349121, |
|
"eval_mcc": 0.016953317256112287, |
|
"eval_runtime": 0.2418, |
|
"eval_samples_per_second": 574.756, |
|
"eval_steps_per_second": 74.429, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1248, |
|
"total_flos": 587617475420160.0, |
|
"train_loss": 0.5243261960836557, |
|
"train_runtime": 82.8963, |
|
"train_samples_per_second": 600.751, |
|
"train_steps_per_second": 75.275 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 587617475420160.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|