{ "best_metric": 0.6127167630057804, "best_model_checkpoint": "/home/ubuntu/utah/babylm-24/src/evaluation/results/finetune/DebertaV2-Base-10M_babylm-A/rte/checkpoint-312", "epoch": 4.0, "eval_steps": 500, "global_step": 1248, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5179855823516846, "eval_f1": 0.6127167630057804, "eval_loss": 0.6988222599029541, "eval_mcc": 0.09869207097117276, "eval_runtime": 0.2603, "eval_samples_per_second": 534.027, "eval_steps_per_second": 69.155, "step": 312 }, { "epoch": 1.6025641025641026, "grad_norm": 2.8960421085357666, "learning_rate": 2.7596153846153846e-05, "loss": 0.6829, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.5899280309677124, "eval_f1": 0.4864864864864865, "eval_loss": 0.6877012848854065, "eval_mcc": 0.1635282583804096, "eval_runtime": 0.2394, "eval_samples_per_second": 580.645, "eval_steps_per_second": 75.191, "step": 624 }, { "epoch": 3.0, "eval_accuracy": 0.46043166518211365, "eval_f1": 0.5714285714285714, "eval_loss": 1.203949213027954, "eval_mcc": -0.03987122665174367, "eval_runtime": 0.2479, "eval_samples_per_second": 560.646, "eval_steps_per_second": 72.602, "step": 936 }, { "epoch": 3.2051282051282053, "grad_norm": 10.899106979370117, "learning_rate": 2.5192307692307694e-05, "loss": 0.4839, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.49640288949012756, "eval_f1": 0.5454545454545454, "eval_loss": 1.811629295349121, "eval_mcc": 0.016953317256112287, "eval_runtime": 0.2418, "eval_samples_per_second": 574.756, "eval_steps_per_second": 74.429, "step": 1248 }, { "epoch": 4.0, "step": 1248, "total_flos": 587617475420160.0, "train_loss": 0.5243261960836557, "train_runtime": 82.8963, "train_samples_per_second": 600.751, "train_steps_per_second": 75.275 } ], "logging_steps": 500, "max_steps": 6240, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 587617475420160.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }